visual_behavior_analysis/visual_behavior/data_access/loading.py at 54201af09179724381b90f81eb7c397789048217 · AllenInstitute/visual_behavior_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import warnings
from allensdk.internal.api import PostgresQueryMixin
from allensdk.brain_observatory.behavior.behavior_session import BehaviorSession
from allensdk.brain_observatory.behavior.behavior_ophys_experiment import BehaviorOphysExperiment
from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache as bpc
from visual_behavior.data_access import filtering
from visual_behavior.data_access import reformat
from visual_behavior.data_access import utilities
from visual_behavior.data_access import from_lims
import visual_behavior.database as db

import os
import glob
import h5py  # for loading motion corrected movie
import numpy as np
import pandas as pd
import configparser as configp  # for parsing scientifica ini files
config = configp.ConfigParser()


try:
    lims_dbname = os.environ["LIMS_DBNAME"]
    lims_user = os.environ["LIMS_USER"]
    lims_host = os.environ["LIMS_HOST"]
    lims_password = os.environ["LIMS_PASSWORD"]
    lims_port = os.environ["LIMS_PORT"]

    mtrain_dbname = os.environ["MTRAIN_DBNAME"]
    mtrain_user = os.environ["MTRAIN_USER"]
    mtrain_host = os.environ["MTRAIN_HOST"]
    mtrain_password = os.environ["MTRAIN_PASSWORD"]
    mtrain_port = os.environ["MTRAIN_PORT"]

    lims_engine = PostgresQueryMixin(
        dbname=lims_dbname,
        user=lims_user,
        host=lims_host,
        password=lims_password,
        port=lims_port
    )

    mtrain_engine = PostgresQueryMixin(
        dbname=mtrain_dbname,
        user=mtrain_user,
        host=mtrain_host,
        password=mtrain_password,
        port=mtrain_port
    )

except Exception as e:
    warn_string = 'failed to set up LIMS/mtrain credentials\n{}\n\ninternal AIBS users should set up environment variables appropriately\nfunctions requiring database access will fail'.format(
        e)
    warnings.warn(warn_string)


# function inputs
# ophys_experiment_id
# ophys_session_id
# behavior_session_id
# ophys_container_id

def get_flagged_ophys_experiment_ids():
    '''
        The following ophys_experiment_ids are currently in the release dataset, but have been flagged for removal

        Before adding an experiment, make an SDK github issue, and include the number here. So we can
        more easily track what things are being filtered out
    '''
    # 856938751, SDK#794, extreme variability in image timing
    oeids = [856938751]

    # Multiscope sessions with 9Hz frame rate, list from: https://github.com/AllenInstitute/visual_behavior_glm/blob/fd805ab59b81c1b60021604eb12246667aab0941/visual_behavior_glm/GLM_visualization_tools.py#L122
    # ophys_session_ids = [873720614, 962045676, 1048363441, 1049240847, 1050231786, 1050597678, 1051107431, 1051319542, 1052096166,
    #  1052330675, 1052512524, 1056065360, 1056238781, 1052752249, 1049240847, 1050929040, 1052330675]

    # Marina's notes on problematic experiments

    # ophys_session_id = 919888953, SDK#2216, OPHYS_3 listed but novel image set susepcted to have been shown based on activity profile
    # ophys_experiment_ids for 919888953
    # [920288855, 920288849, 920288853, 920288851, 920288845, 920288843]

    # ophys_session_id = 931326814, # SDK#2215 and 2202 report mouse was shown image set B for session 2 when it should have been A
    # this will also affect the novelty of the first novel session for this mouse (453988), but that has not been added here yet
    # expt ids for 931326814:
    # [932372699, 932372701, 932372707, 932372711, 932372705]

    # ophys_session_id = 875259383 # SDK#2202 incorrectly shown OPHYS_6_images_A, but
    # this session no longer shows up in the ophys experiment table for some reason

    return oeids

#  RELEVANT DIRECTORIES


def get_platform_analysis_cache_dir():
    """
    This is the cache directory to use for all platform paper analysis
    This cache contains NWB files downloaded directly from AWS
    """
    return '//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/platform_paper_cache'
    # return r'\\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_cache'


def get_production_cache_dir():
    """Get directory containing a manifest file that includes all VB production data, including failed experiments"""
    cache_dir = r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/2020_cache/production_cache'
    return cache_dir


def get_qc_plots_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/qc_plots'


def get_super_container_plots_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/qc_plots/super_container_plots'


def get_container_plots_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/qc_plots/container_plots'


def get_session_plots_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/qc_plots/session_plots'


def get_experiment_plots_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/qc_plots/experiment_plots'


def get_single_cell_plots_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/qc_plots/single_cell_plots'


def get_analysis_cache_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/visual_behavior_production_analysis'


def get_events_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/event_detection'


def get_behavior_model_outputs_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/behavior_model_output'


def get_decoding_analysis_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/decoding'


def get_ophys_glm_dir():
    return r'//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/ophys_glm'


def get_stimulus_response_df_dir(interpolate=True, output_sampling_rate=30, event_type='all'):
    base_dir = get_platform_analysis_cache_dir()
    if interpolate:
        save_dir = os.path.join(base_dir, 'stimulus_response_dfs', 'interpolate_' + str(output_sampling_rate) + 'Hz')
    else:
        save_dir = os.path.join(base_dir, 'stimulus_response_dfs', 'original_frame_rate')
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    return save_dir


def get_multi_session_df_dir(interpolate=True, output_sampling_rate=30, event_type='all'):
    base_dir = get_platform_analysis_cache_dir()
    if interpolate:
        save_dir = os.path.join(base_dir, 'multi_session_mean_response_dfs', 'interpolate_' + str(output_sampling_rate) + 'Hz')
    else:
        save_dir = os.path.join(base_dir, 'multi_session_mean_response_dfs', 'original_frame_rate')
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    return save_dir


# def get_multi_session_df_dir(interpolate=True, output_sampling_rate=30, event_type='all'):
#     base_dir = get_platform_analysis_cache_dir()
#     if interpolate:
#         save_dir = os.path.join(base_dir, 'multi_session_mean_response_dfs', event_type, 'interpolate_' + str(output_sampling_rate) + 'Hz')
#     else:
#         save_dir = os.path.join(base_dir, 'multi_session_mean_response_dfs', event_type, 'original_frame_rate')
#     if not os.path.exists(save_dir):
#         os.mkdir(save_dir)
#     return save_dir


def get_manifest_path():
    """
    Get path to default manifest file for analysis
    Default location of manifest is the production cache directory at /visual_behavior/2020_cache/production_cache'
    This includes all VB production data and is not the same thing as the platform paper cache
    """
    manifest_path = os.path.join(get_production_cache_dir(), "manifest.json")
    return manifest_path


def get_visual_behavior_cache(from_s3=True, release_data_only=True, cache_dir=None):
    """
    Gets the visual behavior dataset cache object from s3 or lims
    :param from_s3: If True, loads manifest from s3 and saves to provided cache_dir (or default cache_dir if None provided)
    :param release_data_only: limits to data released on March 25th and August 12th when loading from lims
    :param cache_dir: directory where to save manifest & data files if using s3
    :return: SDK cache object
    """
    if from_s3:
        if cache_dir is None:
            cache_dir = get_platform_analysis_cache_dir()
            print(cache_dir)
        cache = bpc.from_s3_cache(cache_dir=cache_dir)
    else:
        if release_data_only:
            cache = bpc.from_lims(data_release_date=['2021-03-25', '2021-08-12'])
        else:
            cache = bpc.from_lims()
    return cache


def get_released_ophys_experiment_table(exclude_ai94=True):
    '''
    gets the released ophys experiment table from lims

    Keyword Arguments:
        exclude_ai94 {bool} -- If True, exclude data from mice with Ai94(GCaMP6s) as the reporter line. (default: {True})

    Returns:
        experiment_table -- returns a dataframe with ophys_experiment_id as the index and metadata as columns.
    '''
    print('getting experiment table from lims, NOT AWS')

    cache = bpc.from_lims(data_release_date=['2021-03-25', '2021-08-12'])

    experiment_table = cache.get_ophys_experiment_table()

    if exclude_ai94:
        experiment_table = experiment_table.query('reporter_line != "Ai94(TITL-GCaMP6s)"')

    return experiment_table


def get_platform_paper_experiment_table(add_extra_columns=True, limit_to_closest_active=False, include_4x2_data=False, remove_flagged=True):
    """
    loads the experiment table that was downloaded from AWS and saved to the the platform paper cache dir.
    Then filter out VisualBehaviorMultiscope4areasx2d and Ai94 data.
    And add cell_type column (values = ['Excitatory', 'Sst Inhibitory', 'Vip Inhibitory']
    Set add_extra_columns to False if you dont need things like 'cell_type', 'binned_depth', or 'add_last_familiar'
        with only the closest familiar and novel active sessions to the first novel session to be included (i.e. only one session of each type per container)
    Set limit_to_closest_active to True if you want to limit to experiments that are matched in all experience levels,
        with only the closest familiar and novel active sessions to the first novel session to be included (i.e. only one session of each type per container)
    include_4x2_data (bool), if True, then includes VisualBehaviorMultiscope4areasx2d data
    remove_flagged (bool),  set remove_flagged to False if you want to include experiments that are current in the release dataset,
        but have been flagged for removal


    """
    cache_dir = get_platform_analysis_cache_dir()
    cache = bpc.from_s3_cache(cache_dir=cache_dir)
    experiment_table = cache.get_ophys_experiment_table()

    # remove 4x2 and Ai94 data
    if include_4x2_data:
        experiment_table = experiment_table[(experiment_table.reporter_line != 'Ai94(TITL-GCaMP6s)')].copy()
    else:
        experiment_table = experiment_table[(experiment_table.project_code != 'VisualBehaviorMultiscope4areasx2d') &
                                            (experiment_table.reporter_line != 'Ai94(TITL-GCaMP6s)')].copy()

    # overwrite session number and passive columns to patch for bug flagged in this SDK issue:
    # https://github.com/AllenInstitute/AllenSDK/issues/2251
    experiment_table = utilities.add_session_number_to_experiment_table(experiment_table)
    experiment_table = utilities.add_passive_flag_to_ophys_experiment_table(experiment_table)

    if add_extra_columns:
        # add cell type and binned depth columms for plot labels
        experiment_table = utilities.add_cell_type_column(experiment_table)
        experiment_table = utilities.add_average_depth_across_container(experiment_table)
        experiment_table = utilities.add_binned_depth_column(experiment_table)
        experiment_table = utilities.add_area_depth_column(experiment_table)
        experiment_table = utilities.add_layer_column(experiment_table)
        experiment_table = utilities.add_area_layer_column(experiment_table)
        # add other columns indicating whether a session was the last familiar before the first novel session,
        # or the second passing novel session after the first truly novel one
        experiment_table = utilities.add_date_string(experiment_table)  # add simplified date string for sorting
        experiment_table = utilities.add_first_novel_column(experiment_table)
        experiment_table = utilities.add_n_relative_to_first_novel_column(experiment_table)
        experiment_table = utilities.add_last_familiar_column(experiment_table)
        experiment_table = utilities.add_last_familiar_active_column(experiment_table)
        experiment_table = utilities.add_second_novel_column(experiment_table)
        experiment_table = utilities.add_second_novel_active_column(experiment_table)
        # add column that has a combination of experience level and exposure to omissions for familiar sessions,
        # or exposure to image set for novel sessions
        experiment_table = utilities.add_experience_exposure_column(experiment_table)

    if remove_flagged:
        # Remove flagged ophys experiment ids
        flagged_oeids = get_flagged_ophys_experiment_ids()
        experiment_table = experiment_table.drop(flagged_oeids, axis=0)

    if limit_to_closest_active:
        experiment_table = utilities.limit_to_last_familiar_second_novel_active(experiment_table)
        experiment_table = utilities.limit_to_containers_with_all_experience_levels(experiment_table)

    return experiment_table


def get_platform_paper_behavior_session_table(include_4x2_data=False, add_extra_columns=True):
    """
    loads the behavior sessions table that was downloaded from AWS and saved to the the platform paper cache dir.
    Then optionally filter out VisualBehaviorMultiscope4areasx2d and remove Ai94 data.
    And add cell_type column (values = ['Excitatory', 'Sst Inhibitory', 'Vip Inhibitory']

    include_4x2_data (bool), if True, include Multiscope 4areas x 2 depth data
    add_extra_columns(bool), whether or not to add a bunch of useful columns to sort behavior sessions by
                                such as whether a session has ophys, the abbreviated stimulus name, cell type, etc.
    """
    cache_dir = get_platform_analysis_cache_dir()
    cache = bpc.from_s3_cache(cache_dir=cache_dir)
    behavior_sessions = cache.get_behavior_session_table()
    # reset index to retain behavior_session_id during the below transformations
    behavior_sessions = behavior_sessions.reset_index()

    # add project codes to behavior sessions
    experiments_table = cache.get_ophys_experiment_table()
    behavior_sessions = utilities.add_project_code_to_behavior_sessions(behavior_sessions, experiments_table)

    # get rid of NaNs, documented in SDK#2218
    behavior_sessions = behavior_sessions[behavior_sessions.session_type.isnull() == False]
    print(len(behavior_sessions), 'sessions after removing NaN session types')

    # remove 4x2 and Ai94 data
    if include_4x2_data:
        behavior_sessions = behavior_sessions[(behavior_sessions.reporter_line != 'Ai94(TITL-GCaMP6s)')].copy()
        print(len(behavior_sessions), 'sessions after removing Ai94')
    else:
        behavior_sessions = behavior_sessions[(behavior_sessions.project_code != 'VisualBehaviorMultiscope4areasx2d') &
                                              (behavior_sessions.reporter_line != 'Ai94(TITL-GCaMP6s)')].copy()
        print(len(behavior_sessions), 'sessions after removing VisualBehaviorMultiscope4areasx2d and Ai94')

    if add_extra_columns:
        # overwrite session number and passive columns to patch for bug flagged in this SDK issue:
        # https://github.com/AllenInstitute/AllenSDK/issues/2251
        behavior_sessions = utilities.add_session_number_to_experiment_table(behavior_sessions)
        behavior_sessions = utilities.add_passive_flag_to_ophys_experiment_table(behavior_sessions)
        behavior_sessions = utilities.add_cell_type_column(behavior_sessions)
        # add experience_level column
        behavior_sessions = utilities.add_experience_level_to_behavior_sessions(behavior_sessions)
        # add training stage (abbreviated session type) and first and last day of training stage
        behavior_sessions = utilities.add_first_last_day_of_stage_to_behavior_sessions(behavior_sessions)
        # add experiment phase (OPHYS vs TRAINING), and stimulus type (gratings_static, images_A, etc)
        behavior_sessions = utilities.add_has_ophys_column_to_behavior_sessions(behavior_sessions)
        behavior_sessions = utilities.add_experiment_phase_to_behavior_sessions(behavior_sessions)
        behavior_sessions = utilities.add_stimulus_to_table(behavior_sessions)
        behavior_sessions = utilities.add_first_last_day_of_stimulus_to_behavior_sessions(behavior_sessions)
        # add stimulus phase column (ex: 'gratings_flashed_training', 'images_A_training', 'images_A_ophys')
        behavior_sessions = utilities.add_stimulus_phase_to_behavior_sessions(behavior_sessions)
        # verify that nothing has been accidentally removed
        print(len(behavior_sessions), 'sessions after adding extra columns')

    # reset the index to behavior_session_id
    behavior_sessions = behavior_sessions.set_index('behavior_session_id')

    return behavior_sessions


def get_filtered_ophys_experiment_table(include_failed_data=False, release_data_only=True, exclude_ai94=True,
                                        add_extra_columns=False, from_cached_file=False, overwrite_cached_file=False):
    """
    Loads a list of available ophys experiments FROM LIMS (not S3 cache) and adds additional useful columns to the table.
    By default, loads from a saved cached file.
    If cached file does not exist, loads list of available experiments directly from lims using SDK BehaviorProjectCache, and saves the reformatted table to the default Visual Behavior data cache location.

    Keyword Arguments:

        include_failed_data {bool} -- If True, return all experiments including those from failed containers and receptive field mapping experiments.
                                      If False, returns only experiments that have passed experiment level QC.
                                      Setting include_failed_data to True will automatically set release_data_only to False
                                      There is no guarantee on data quality or reprocessing for these experiments.
        release_data_only {bool} -- If True, return only experiments that were released on March 25th, 2021 and August 12, 2021.
                                    Fail tags and other extra columns will not be added if this is set to True.
                                    Release data includes project_codes = ['VisualBehavior', 'VisualBehaviorTask1B', 'VisualBehaviorMultiscope'].
                                    If False, return all Visual Behavior ophys experiments that have been collected, including data from project_code = 'VisualBehaviorMultiscope4areasx2d'.
                                    Note, if False, there is no guarantee on data quality or processing for these experiments.
        add_extra_columns {bool} -- Additional columns will be added, including fail tags, model availability and location string
        exclude_ai94 {bool} -- If True, exclude data from mice with Ai94(GCaMP6s) as the reporter line. (default: {True})
        from_cached_file {bool} -- If True, loads experiments table from saved file in default cache location (returned by get_production_cache_dir())
        overwrite_cached_file {bool} -- If True, saves experiment_table to default cache folder, overwrites existing file

    Returns:
        experiment_table -- returns a dataframe with ophys_experiment_id as the index and metadata as columns.
    """
    if include_failed_data is True:
        release_data_only = False
    if release_data_only:
        # get cache from lims for data released on March 25th
        print('getting experiment table for March and August releases from lims')
        cache = bpc.from_lims(data_release_date=['2021-03-25', '2021-08-12'])
        experiments = cache.get_ophys_experiment_table()
    if not release_data_only:
        if from_cached_file:
            if 'filtered_ophys_experiment_table.csv' in os.listdir(get_production_cache_dir()):
                filepath = os.path.join(get_production_cache_dir(), 'filtered_ophys_experiment_table.csv')
                print('loading cached experiment_table')
                print('last updated on:')
                import time
                print(time.ctime(os.path.getctime(filepath)))
                # load the cached file
                experiments = pd.read_csv(filepath)
            else:
                print('there is no filtered_ophys_experiment_table.csv', get_production_cache_dir())
        else:
            print('getting up-to-date experiment_table from lims')
            # get everything in lims
            cache = bpc.from_lims()
            experiments = cache.get_ophys_experiment_table(passed_only=False)
            # limit to the 4 VisualBehavior project codes
            experiments = filtering.limit_to_production_project_codes(experiments)
            if add_extra_columns:
                print('adding extra columns')
                print('NOTE: this is slow. set from_cached_file to True to load cached version of experiments_table at:')
                print(get_production_cache_dir())
                # create cre_line column, set NaN session_types to None, add model output availability and location columns
                experiments = reformat.reformat_experiments_table(experiments)
        if include_failed_data:
            print('including failed data')
            pass
        else:
            print('limiting to passed experiments')
            experiments = filtering.limit_to_passed_experiments(experiments)
            experiments = filtering.remove_failed_containers(experiments)  # container_workflow_state can be anything other than 'failed'
            # limit to sessions that start with OPHYS
            print('limiting to sessions that start with OPHYS')
            experiments = filtering.limit_to_valid_ophys_session_types(experiments)
    if experiments.index.name != 'ophys_experiment_id':
        experiments = experiments.drop_duplicates(subset='ophys_experiment_id')
        experiments = experiments.set_index('ophys_experiment_id')
    if exclude_ai94:
        print('excluding Ai94 data')
        experiments = experiments[experiments.full_genotype != 'Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-GCaMP6s)/wt']
    if 'cre_line' not in experiments.keys():
        experiments['cre_line'] = [full_genotype.split('/')[0] for full_genotype in experiments.full_genotype.values]
    # filter one more time on load to restrict to Visual Behavior project experiments ###
    experiments = filtering.limit_to_production_project_codes(experiments)

    # add new columns for conditions to analyze for platform paper ###
    experiments = utilities.add_cell_type_column(experiments)

    if overwrite_cached_file == True:
        print('overwriting pre-saved experiments table file')
        experiments.to_csv(os.path.join(get_production_cache_dir(), 'filtered_ophys_experiment_table.csv'))
    return experiments


def get_filtered_ophys_session_table(release_data_only=True, include_failed_data=False):
    """Get ophys sessions table from SDK, and add container_id and container_workflow_state to table,
        add session_workflow_state to table (defined as >1 experiment within session passing),
        and return only sessions where container and session workflow states are 'passed'.
        Includes Multiscope data.
            filtering criteria:
                project codes: VisualBehavior, VisualBehaviorTask1B,
                            VisualBehaviorMultiscope, VisualBehaviorMultiscope4areasx2d
                session workflow state: "passed"
                session_type: OPHYS_1_images_A', 'OPHYS_1_images_B',  'OPHYS_1_images_G',
                            'OPHYS_2_images_A_passive',  'OPHYS_2_images_B_passive',  'OPHYS_2_images_G_passive'
                            'OPHYS_3_images_A',  'OPHYS_3_images_B', 'OPHYS_3_images_G',
                            'OPHYS_4_images_A', 'OPHYS_4_images_B',  'OPHYS_4_images_H'
                            'OPHYS_5_images_A_passive', 'OPHYS_5_images_B_passive', 'OPHYS_5_images_H_passive'
                            'OPHYS_6_images_A',  'OPHYS_6_images_B',   'OPHYS_6_images_H'
    Returns:
        dataframe -- filtered version of the ophys sessions table(filtering criteria above) with the
                        following columns:
                        "ophys_session_id": df index, 9 digit unique identifier for an ophys session
                        "ophys_experiment_id": 9 digit unique identifier for an ophys experiment
                        "project_code": project code associated with the experiment and session
                        "session_name":
                        "session_type":
                        "equipment_name":
                        "date_of_acquisition":
                        "specimen_id":
                        "reporter_line":
                        "driver_line":
                        "at_least_one_experiment_passed":
                        "session_workflow_state":
                        "container_id":
                        "container_workflow_state":
    """
    cache = bpc.from_lims()
    sessions = cache.get_ophys_session_table()
    if not release_data_only:
        from_cached_file = True
    else:
        from_cached_file = False
    experiment_table = get_filtered_ophys_experiment_table(release_data_only=release_data_only,
                                                           include_failed_data=include_failed_data,
                                                           from_cached_file=from_cached_file)
    sessions = filtering.limit_to_production_project_codes(sessions)
    sessions = reformat.add_all_qc_states_to_ophys_session_table(sessions, experiment_table)
    sessions = filtering.limit_to_valid_ophys_session_types(sessions)
    sessions = filtering.limit_to_passed_ophys_sessions(sessions)
    sessions = filtering.remove_failed_containers(sessions)
    sessions = reformat.add_model_outputs_availability_to_table(sessions)

    return sessions


def get_filtered_behavior_session_table(release_data_only=True):
    """
    Loads list of behavior sessions from SDK BehaviorProjectCache, and does some basic filtering and addition of columns, such as changing mouse_id from str to int and adding project code.

    Keyword Arguments:
        release_data_only {bool} -- If True, only return behavior sessions for mice with ophys data that will be included in the March data release.
        This does not include OPHYS_7_receptive_field_mapping or sessions with an unexpected session_type (i.e. where session_type is NaN)

    Returns:
        behavior_sessions -- Dataframe with behavior_session_id as the index and metadata as columns.
    """
    cache = bpc.from_lims()
    behavior_sessions = cache.get_behavior_session_table()
    behavior_sessions = behavior_sessions.reset_index()
    # make mouse_id an int not string
    behavior_sessions['mouse_id'] = [int(mouse_id) for mouse_id in behavior_sessions.mouse_id.values]
    # add project code from experiments table
    all_experiments = cache.get_ophys_experiment_table()
    all_experiments['mouse_id'] = [int(mouse_id) for mouse_id in all_experiments.mouse_id.values]
    behavior_sessions = behavior_sessions.merge(all_experiments[['mouse_id']], on='mouse_id')
    if release_data_only:
        # limit to mice that are in the data release & have a valid session_type
        release_experiments = get_filtered_ophys_experiment_table(release_data_only=True)
        release_mice = release_experiments.mouse_id.unique()
        behavior_sessions = behavior_sessions[behavior_sessions.mouse_id.isin(release_mice)]
        behavior_sessions = behavior_sessions[behavior_sessions.session_type.isnull() == False]
        behavior_sessions = behavior_sessions[behavior_sessions.session_type != 'OPHYS_7_receptive_field_mapping']
        behavior_sessions['has_passing_ophys_data'] = [True if behavior_session_id in release_experiments.behavior_session_id.values
                                                       else False for behavior_session_id in behavior_sessions.behavior_session_id]
    behavior_sessions = behavior_sessions.drop_duplicates(subset=['behavior_session_id'])
    behavior_sessions = behavior_sessions.set_index('behavior_session_id')
    return behavior_sessions


def get_second_release_candidates():
    """
    Preliminary function to get candidates for August release. Will be revised.
    :return:
    """
    full_cache = bpc.from_lims()
    full_experiment_table = full_cache.get_ophys_experiment_table()

    unreleased_complete_multiscope = full_experiment_table[
        full_experiment_table.project_code.isin(['VisualBehaviorMultiscope']) &
        (full_experiment_table.container_workflow_state.isin(['completed', 'container_qc'])) &
        (full_experiment_table.experiment_workflow_state == 'passed')]
    print(len(unreleased_complete_multiscope), 'un-released VisualBehaviorMultiscope where container_workflow_state == completed')

    unreleased_not_failed_4x2 = full_experiment_table[(full_experiment_table.project_code.isin(['VisualBehaviorMultiscope4areasx2d']) &
                                                       (full_experiment_table.container_workflow_state != 'failed') &
                                                       (full_experiment_table.experiment_workflow_state == 'passed'))]
    print(len(unreleased_not_failed_4x2), 'un-released VisualBehaviorMultiscope4areasx2d where container_workflow_state != failed')

    release_candidates = pd.concat([unreleased_complete_multiscope, unreleased_not_failed_4x2])
    print(len(release_candidates), 'release candidates')

    return release_candidates


def get_extended_stimulus_presentations_table(stimulus_presentations, licks, rewards, running_speed, eye_tracking=None, behavior_session_id=None):
    """
    Takes SDK stimulus presentations table and adds a bunch of useful columns by incorporating data from other tables
    and reformatting existing column data
    Additional columns include epoch #s for 10 minute bins in the session, whether a flash was a pre or post change or omission,
    the mean running speed per flash, mean pupil area per flash, licks per flash, rewards per flash, lick rate, reward rate,
    time since last change, time since last omission, time since last lick

    Set eye_tracking to None by default so that things still run for behavior only sessions
    If behavior_session_id is provided, will load metrics from behavior model outputs file
    """
    if 'time' in licks.keys():
        licks = licks.rename(columns={'time': 'timestamps'})
    if 'orientation' in stimulus_presentations.columns:
        stimulus_presentations = stimulus_presentations.drop(columns=['orientation', 'image_set', 'index',
                                                                      'phase', 'spatial_frequency'])
    stimulus_presentations = reformat.add_change_each_flash(stimulus_presentations)
    stimulus_presentations['pre_change'] = stimulus_presentations['change'].shift(-1)
    stimulus_presentations['pre_omitted'] = stimulus_presentations['omitted'].shift(-1)
    stimulus_presentations = reformat.add_epoch_times(stimulus_presentations)
    # stimulus_presentations = reformat.add_mean_running_speed(stimulus_presentations, running_speed)
    if eye_tracking is not None:
        try:  # if eye tracking data is not present or cant be loaded
            stimulus_presentations = reformat.add_mean_pupil_area(stimulus_presentations, eye_tracking)
        except BaseException:  # set to NaN
            stimulus_presentations['mean_pupil_area'] = np.nan
    stimulus_presentations = reformat.add_licks_each_flash(stimulus_presentations, licks)
    stimulus_presentations = reformat.add_response_latency(stimulus_presentations)
    stimulus_presentations = reformat.add_rewards_each_flash(stimulus_presentations, rewards)
    stimulus_presentations['licked'] = [True if len(licks) > 0 else False for licks in
                                        stimulus_presentations.licks.values]
    # lick rate per second
    stimulus_presentations['lick_rate'] = stimulus_presentations['licked'].rolling(window=320, min_periods=1,
                                                                                   win_type='triang').mean() / .75
    stimulus_presentations['rewarded'] = [True if len(rewards) > 0 else False for rewards in stimulus_presentations.rewards.values]
    # (rewards/stimulus)*(1 stimulus/.750s) = rewards/second
    stimulus_presentations['reward_rate_per_second'] = stimulus_presentations['rewarded'].rolling(window=320, min_periods=1,
                                                                                                  win_type='triang').mean() / .75  # units of rewards per second
    # (rewards/stimulus)*(1 stimulus/.750s)*(60s/min) = rewards/min
    stimulus_presentations['reward_rate'] = stimulus_presentations['rewarded'].rolling(window=320, min_periods=1, win_type='triang').mean() * (60 / .75)  # units of rewards/min

    reward_threshold = 2 / 3  # 2/3 rewards per minute = 1/90 rewards/second

    # NOTE: This method of calculating reward rate only works for sessions with flashes,
    # i.e. it will give incorrect results for TRAINING_0 because there is not 1 stimulus per 0.75s

    stimulus_presentations['engaged'] = [x > reward_threshold for x in stimulus_presentations['reward_rate']]
    stimulus_presentations['engagement_state'] = ['engaged' if engaged == True else 'disengaged' for engaged in stimulus_presentations['engaged'].values]
    stimulus_presentations = reformat.add_response_latency(stimulus_presentations)
    stimulus_presentations = reformat.add_image_contrast_to_stimulus_presentations(stimulus_presentations)
    stimulus_presentations = reformat.add_time_from_last_lick(stimulus_presentations, licks)
    stimulus_presentations = reformat.add_time_from_last_reward(stimulus_presentations, rewards)
    stimulus_presentations = reformat.add_time_from_last_change(stimulus_presentations)
    try:  # behavior only sessions dont have omissions
        stimulus_presentations = reformat.add_time_from_last_omission(stimulus_presentations)
        stimulus_presentations['flash_after_omitted'] = stimulus_presentations['omitted'].shift(1)
    except BaseException:
        pass
    stimulus_presentations['flash_after_change'] = stimulus_presentations['change'].shift(1)
    stimulus_presentations['image_name_next_flash'] = stimulus_presentations['image_name'].shift(-1)
    stimulus_presentations['image_index_next_flash'] = stimulus_presentations['image_index'].shift(-1)
    stimulus_presentations['image_name_previous_flash'] = stimulus_presentations['image_name'].shift(1)
    stimulus_presentations['image_index_previous_flash'] = stimulus_presentations['image_index'].shift(1)
    stimulus_presentations['lick_on_next_flash'] = stimulus_presentations['licked'].shift(-1)
    stimulus_presentations['lick_rate_next_flash'] = stimulus_presentations['lick_rate'].shift(-1)
    stimulus_presentations['lick_on_previous_flash'] = stimulus_presentations['licked'].shift(1)
    stimulus_presentations['lick_rate_previous_flash'] = stimulus_presentations['lick_rate'].shift(1)
    # if behavior_session_id:
    #     if check_if_model_output_available(behavior_session_id):
    #         stimulus_presentations = add_model_outputs_to_stimulus_presentations(
    #             stimulus_presentations, behavior_session_id)
    #     else:
    #         print('model outputs not available')
    return stimulus_presentations


def get_stimulus_response_df_filepath_for_experiment(ophys_experiment_id, data_type, event_type,
                                                     interpolate=True, output_sampling_rate=30,
                                                     epoch_duration_mins=5):

    filepath = os.path.join(get_stimulus_response_df_dir(interpolate, int(output_sampling_rate), event_type),
                            str(ophys_experiment_id) + '_' + data_type + '_' + event_type + '_epoch_dur_' + str(epoch_duration_mins) + '.h5')
    return filepath


def get_stimulus_response_df(dataset, time_window=[-3, 3.1], interpolate=True, output_sampling_rate=30,
                             data_type='filtered_events', event_type='all', load_from_file=True, epoch_duration_mins=5):
    """
    Load a dataframe with stimulus aligned traces for all cells (or for a given behavior timeseries) using mindscope_utilities
    and merges with annotated stimulus_presentations table that includes behavior metadata
    Will interpolate traces to desired output_sampling_rate if interpolate = True
    Works for cell traces (dF/F, events, filtered_events) or behavioral timeseries (running, pupil, lick rate)
    dataframe can include all stimulus_presentations in the session (event_type='all')
    or it can be limited to just changes (event_type='changes') or omissions (event_type='omissions) to make loading faster
    If the response_df has been pre-generated and saved to the default cache directory, this function will load the pre-existing file if load_from_file=True
    otherwise, will generate dataframe and save it

    inputs:
        dataset: BehaviorOphysExperiment instance
        time_window: window over which to extract the event triggered response around each stimulus presentation time
        interpolate: Boolean, whether or not to interpolate traces
        output_sampling_rate: sampling rate for interpolation, only used if interpolate is True
        data_type: which timeseries to get event triggered responses for
                    options: 'filtered_events', 'events', 'dff', 'running_speed', 'pupil_diameter', 'lick_rate'
        epoch_duration_mins: duration in minutes to use when creating 'epoch' column in the annotated stimulus_presentations table
                                'epoch' column provides an integer value of the epoch # to which each stimulus presentation belongs
    output:
        stimulus_response_df: pandas dataframe
                                if data_type is 'filtered_events', 'events', or 'dff', table will be formatted such that:
                                    each row is one cell's response to one stimulus_presentation
                                    total length of dataframe should be n_cells x n_stimulus_presentations
                                if data_type is 'running_speed', 'pupil_diameter', or 'lick_rate', table will be formatted such that:
                                    each row is one the stimulus aligned timeseries for each stimulus_presentation
                                    total length of dataframe should be n_stimulus_presentations
                                columns include stimulus and behavior metadata
     """

    import mindscope_utilities.visual_behavior_ophys.data_formatting as vb_ophys
    # load stimulus response df from file if it exists otherwise generate it
    ophys_experiment_id = dataset.ophys_experiment_id
    filepath = get_stimulus_response_df_filepath_for_experiment(ophys_experiment_id, data_type, event_type,
                                                                interpolate=interpolate, output_sampling_rate=output_sampling_rate,
                                                                epoch_duration_mins=epoch_duration_mins)

    if event_type == 'omissions':
        response_window_duration = 0.75
    else:
        response_window_duration = 0.5

    if load_from_file:
        if os.path.exists(filepath):
            try:  # attempt to load from file
                print('file exists:', )
                print('loading response df from file for', ophys_experiment_id, data_type, event_type)
                sdf = pd.read_hdf(filepath, key='df')
            except Exception as e:  # if it cant be loaded for whatever reason, create it and save it
                print('stimulus_response_df does not exist or could not be loaded for', filepath)
                print(e)
                print('generating response df')
                sdf = vb_ophys.get_stimulus_response_df(dataset, data_type=data_type, event_type=event_type,
                                                        time_window=time_window, interpolate=interpolate,
                                                        output_sampling_rate=output_sampling_rate,
                                                        response_window_duration=response_window_duration)
                try:  # some experiments with lots of neurons cant save
                    sdf.to_hdf(filepath, key='df')
                    print('saved response df to', filepath)
                except BaseException:
                    print('could not save', filepath)
        else:  # if file does not exist, generate response df
            print('generating response df')
            sdf = vb_ophys.get_stimulus_response_df(dataset, data_type=data_type, event_type=event_type,
                                                    time_window=time_window, interpolate=interpolate,
                                                    output_sampling_rate=output_sampling_rate,
                                                    response_window_duration=response_window_duration)
    else:  # if load_from_file is False, generate response df
        print('generating response df')
        sdf = vb_ophys.get_stimulus_response_df(dataset, data_type=data_type, event_type=event_type,
                                                time_window=time_window, interpolate=interpolate,
                                                output_sampling_rate=output_sampling_rate,
                                                response_window_duration=response_window_duration)

    # if extended_stimulus_presentations is an attribute of the dataset object, use it, otherwise get regular stimulus_presentations
    if 'extended_stimulus_presentations' in dir(dataset):
        stimulus_presentations = dataset.extended_stimulus_presentations.copy()
    else:
        stimulus_presentations = vb_ophys.get_annotated_stimulus_presentations(dataset, epoch_duration_mins=epoch_duration_mins)
    sdf = sdf.merge(stimulus_presentations, on='stimulus_presentations_id')

    return sdf


# LOAD OPHYS DATA FROM SDK AND EDIT OR ADD METHODS/ATTRIBUTES WITH BUGS OR INCOMPLETE FEATURES #


class BehaviorOphysDataset(BehaviorOphysExperiment):
    """
    Loads SDK ophys experiment object and 1) optionally filters out invalid ROIs, 2) adds extended_stimulus_presentations table, 3) adds extended_trials table, 4) adds behavior movie PCs and timestamps

    Returns:
        BehaviorOphysDataset {class} -- object with attributes & methods to access ophys and behavior data
                                            associated with an ophys_experiment_id (single imaging plane)
    """

    def __init__(self, api, include_invalid_rois=False,
                 eye_tracking_z_threshold: float = 3.0, eye_tracking_dilation_frames: int = 2,
                 events_filter_scale: float = 2.0, events_filter_n_time_steps: int = 20):
        """
        :param session: BehaviorOphysExperiment {class} -- instance of allenSDK BehaviorOphysExperiment object for one ophys_experiment_id
        :param _include_invalid_rois: if True, do not filter out invalid ROIs from cell_specimens_table and dff_traces
        """
        super().__init__(
            api=api,
            eye_tracking_z_threshold=eye_tracking_z_threshold,
            eye_tracking_dilation_frames=eye_tracking_dilation_frames,
            events_filter_scale=events_filter_scale,
            events_filter_n_time_steps=events_filter_n_time_steps
        )

        self._include_invalid_rois = include_invalid_rois

    @property
    def cell_specimen_table(self):
        cell_specimen_table = super().cell_specimen_table.copy()
        if not self._include_invalid_rois:
            cell_specimen_table = cell_specimen_table[cell_specimen_table.valid_roi == True]
        self._cell_specimen_table = cell_specimen_table
        return self._cell_specimen_table

    @property
    def corrected_fluorescence_traces(self):
        if not self._include_invalid_rois:
            corrected_fluorescence_traces = super().corrected_fluorescence_traces
            cell_specimen_table = super().cell_specimen_table[super().cell_specimen_table.valid_roi == True]
            valid_cells = cell_specimen_table.cell_roi_id.values
            self._corrected_fluorescence_traces = corrected_fluorescence_traces[
                corrected_fluorescence_traces.cell_roi_id.isin(valid_cells)]
        else:
            self._corrected_fluorescence_traces = super().corrected_fluorescence_traces
        return self._corrected_fluorescence_traces

    @property
    def dff_traces(self):
        if not self._include_invalid_rois:
            dff_traces = super().dff_traces
            cell_specimen_table = super().cell_specimen_table[super().cell_specimen_table.valid_roi == True]
            valid_cells = cell_specimen_table.cell_roi_id.values
            self._dff_traces = dff_traces[dff_traces.cell_roi_id.isin(valid_cells)]
        else:
            self._dff_traces = super().dff_traces
        return self._dff_traces

    @property
    def events(self):
        if not self._include_invalid_rois:
            events = super().events
            cell_specimen_table = super().cell_specimen_table[super().cell_specimen_table.valid_roi == True]
            valid_cells = cell_specimen_table.cell_roi_id.values
            self._events = events[events.cell_roi_id.isin(valid_cells)]
        else:
            self._events = super().events
        return self._events

    @property
    def metadata(self):
        # for figure titles & filenames
        metadata = super().metadata
        # metadata['mouse_id'] = metadata['LabTracks_ID']
        # metadata['equipment_name'] = metadata['rig_name']
        # metadata['date_of_acquisition'] = metadata['experiment_datetime']
        self._metadata = metadata
        return self._metadata

    @property
    def metadata_string(self):
        # for figure titles & filenames
        m = self.metadata
        rig_name = m['equipment_name'].split('.')[0] + m['equipment_name'].split('.')[1]
        self._metadata_string = str(m['mouse_id']) + '_' + str(m['ophys_experiment_id']) + '_' + m['driver_line'][
            0] + '_' + m['targeted_structure'] + '_' + str(m['imaging_depth']) + '_' + m['session_type'] + '_' + rig_name
        return self._metadata_string

    @property
    def extended_stimulus_presentations(self):
        extended_stimulus_presentations = get_extended_stimulus_presentations_table(self.stimulus_presentations.copy(),
                                                                                    self.licks, self.rewards, self.running_speed, self.eye_tracking)
        self._extended_stimulus_presentations = extended_stimulus_presentations
        return self._extended_stimulus_presentations

    @property
    def extended_trials(self):
        trials = super().trials.copy()
        trials = reformat.add_epoch_times(trials)
        trials = reformat.add_trial_type_to_trials_table(trials)
        trials = reformat.add_reward_rate_to_trials_table(trials, self.extended_stimulus_presentations)
        trials = reformat.add_engagement_state_to_trials_table(trials, self.extended_stimulus_presentations)
        self._extended_trials = trials
        return self._extended_trials

    @property
    def behavior_movie_timestamps(self):
        lims_data = utilities.get_lims_data(self.ophys_experiment_id)
        timestamps = utilities.get_timestamps(lims_data)
        self._behavior_movie_timestamps = timestamps['behavior_monitoring']['timestamps'].copy()
        return self._behavior_movie_timestamps

    @property
    def behavior_movie_pc_masks(self):
        ophys_session_id = from_lims.get_ophys_session_id_for_ophys_experiment_id(self.ophys_experiment_id)
        self._behavior_movie_pc_masks = get_pc_masks_for_session(ophys_session_id)
        return self._behavior_movie_pc_masks

    @property
    def behavior_movie_pc_activations(self):
        ophys_session_id = from_lims.get_ophys_session_id_for_ophys_experiment_id(self.ophys_experiment_id)
        self._behavior_movie_pc_activations = get_pc_activations_for_session(ophys_session_id)
        return self._behavior_movie_pc_activations

    @property
    def behavior_movie_predictions(self):
        ophys_session_id = from_lims.get_ophys_session_id_for_ophys_experiment_id(self.ophys_experiment_id)
        movie_predictions = get_behavior_movie_predictions_for_session(ophys_session_id)
        movie_predictions.index.name = 'frame_index'
        movie_predictions['timestamps'] = self.behavior_movie_timestamps[:len(
            movie_predictions)]  # length check will trim off spurious timestamps at the end
        self._behavior_movie_predictions = movie_predictions
        return self._behavior_movie_predictions

    def get_cell_specimen_id_for_cell_index(self, cell_index):
        cell_specimen_table = self.cell_specimen_table.copy()
        cell_specimen_id = cell_specimen_table[cell_specimen_table.cell_index == cell_index].index.values[0]
        return cell_specimen_id

    def get_cell_index_for_cell_specimen_id(self, cell_specimen_id):
        cell_specimen_table = self.cell_specimen_table.copy()
        cell_index = cell_specimen_table[cell_specimen_table.index == cell_specimen_id].cell_index.values[0]
        return cell_index

    def get_cell_specimen_id_for_cell_roi_id(self, cell_roi_id):
        cell_specimen_table = self.cell_specimen_table.copy()
        cell_specimen_id = cell_specimen_table[cell_specimen_table.cell_roi_id == cell_roi_id].index.values[0]
        return cell_specimen_id


def get_ophys_dataset(ophys_experiment_id, include_invalid_rois=False, load_from_lims=False, load_from_nwb=True,
                      get_extended_stimulus_presentations=False, get_behavior_movie_timestamps=False):
    """
    Gets behavior + ophys data for one experiment (single imaging plane), either using the SDK LIMS API,
    SDK NWB API, or using BehaviorOphysDataset wrapper which inherits the LIMS API BehaviorOphysSession object,
    and adds functionality including invalid ROI filtering, extended stimulus_presentations and trials, and behavior movie data.

    Arguments:
        ophys_experiment_id {int} -- 9 digit ophys experiment ID
        include_invalid_rois {Boolean} -- if True, return all ROIs including invalid. If False, filter out invalid ROIs
        load_from_lims -- if True, loads dataset directly from BehaviorOphysSession.from_lims(). Invalid ROIs will be included.
        load_from_nwb -- if True, loads dataset directly from BehaviorOphysSession.from_nwb_path(). Invalid ROIs will not be included.
        get_extended_stimulus_presentations -- if True, adds an attribute "extended_stimulus_presentations" to the dataset object

        If both from_lims and from_nwb are set to False, an exception will be raised

    Returns:
        object -- BehaviorOphysSession or BehaviorOphysDataset instance, which inherits attributes & methods from SDK BehaviorOphysSession
    """

    id_type = from_lims.get_id_type(ophys_experiment_id)
    if id_type != 'ophys_experiment_id':
        warnings.warn('It looks like you passed an id of type {} instead of an ophys_experiment_id'.format(id_type))

    assert id_type == 'ophys_experiment_id', "The passed ID type is {}. It must be an ophys_experiment_id".format(id_type)

    if load_from_lims:
        dataset = BehaviorOphysExperiment.from_lims(int(ophys_experiment_id))
    elif load_from_nwb:
        cache_dir = get_platform_analysis_cache_dir()
        cache = bpc.from_s3_cache(cache_dir=cache_dir)
        dataset = cache.get_behavior_ophys_experiment(ophys_experiment_id)
    else:
        raise Exception('Set load_from_lims or load_from_nwb to True')

    if get_extended_stimulus_presentations:
        # add extended stimulus presentations
        dataset.extended_stimulus_presentations = get_extended_stimulus_presentations_table(
            dataset.stimulus_presentations.copy(),
            dataset.licks, dataset.rewards,
            dataset.running_speed, dataset.eye_tracking)
    if get_behavior_movie_timestamps:
        # add behavior movie timestamps
        lims_data = utilities.get_lims_data(ophys_experiment_id)
        timestamps = utilities.get_timestamps(lims_data)
        dataset.behavior_movie_timestamps = timestamps['behavior_monitoring']['timestamps'].copy()

    return dataset


class BehaviorDataset(BehaviorSession):
    """
    Loads SDK behavior session object and adds extended_stimulus_presentations and extended trials tables.

    Returns:
        BehaviorDataset {class} -- object with attributes & methods to access behavior data associated with a behavior_session_id
    """

    def __init__(self, api):
        """
        :param session: BehaviorSession {class} -- instance of allenSDK BehaviorSession object for one behavior_session_id
        """
        super().__init__(api)

    @property
    def metadata(self):
        metadata = super().metadata
        self._metadata = metadata
        return self._metadata

    @property
    def metadata_string(self):
        # for figure titles & filenames
        m = self.metadata
        rig_name = m['equipment_name'].split('.')[0] + m['equipment_name'].split('.')[1]
        self._metadata_string = str(m['mouse_id']) + '_' + str(m['behavior_session_id']) + '_' + m['driver_line'][
            0] + '_' + m['session_type'] + '_' + rig_name
        return self._metadata_string

    @property
    def extended_stimulus_presentations(self):
        stimulus_presentations = self.stimulus_presentations.copy()
        stimulus_presentations = reformat.add_change_each_flash(stimulus_presentations)
        stimulus_presentations['pre_change'] = stimulus_presentations['change'].shift(-1)
        stimulus_presentations['pre_omitted'] = stimulus_presentations['omitted'].shift(-1)
        stimulus_presentations = reformat.add_epoch_times(stimulus_presentations)
        stimulus_presentations = reformat.add_mean_running_speed(stimulus_presentations, self.running_speed)
        stimulus_presentations = reformat.add_licks_each_flash(stimulus_presentations, self.licks)
        stimulus_presentations = reformat.add_response_latency(stimulus_presentations)
        stimulus_presentations = reformat.add_rewards_each_flash(stimulus_presentations, self.rewards)
        stimulus_presentations['licked'] = [True if len(licks) > 0 else False for licks in
                                            stimulus_presentations.licks.values]
        stimulus_presentations['lick_rate'] = stimulus_presentations['licked'].rolling(window=320, min_periods=1,
                                                                                       win_type='triang').mean() / .75
        stimulus_presentations['rewarded'] = [True if len(rewards) > 0 else False for rewards in
                                              stimulus_presentations.rewards.values]
        stimulus_presentations['reward_rate'] = stimulus_presentations['rewarded'].rolling(window=320, min_periods=1,
                                                                                           win_type='triang').mean()
        stimulus_presentations = reformat.add_response_latency(stimulus_presentations)
        # stimulus_presentations = reformat.add_image_contrast_to_stimulus_presentations(stimulus_presentations)
        stimulus_presentations = reformat.add_time_from_last_lick(stimulus_presentations, self.licks)
        stimulus_presentations = reformat.add_time_from_last_reward(stimulus_presentations, self.rewards)
        stimulus_presentations = reformat.add_time_from_last_change(stimulus_presentations)
        stimulus_presentations['flash_after_change'] = stimulus_presentations['change'].shift(1)
        stimulus_presentations['image_name_next_flash'] = stimulus_presentations['image_name'].shift(-1)
        stimulus_presentations['image_index_next_flash'] = stimulus_presentations['image_index'].shift(-1)
        stimulus_presentations['image_name_previous_flash'] = stimulus_presentations['image_name'].shift(1)
        stimulus_presentations['image_index_previous_flash'] = stimulus_presentations['image_index'].shift(1)
        stimulus_presentations['lick_on_next_flash'] = stimulus_presentations['licked'].shift(-1)
        stimulus_presentations['lick_rate_next_flash'] = stimulus_presentations['lick_rate'].shift(-1)
        stimulus_presentations['lick_on_previous_flash'] = stimulus_presentations['licked'].shift(1)
        stimulus_presentations['lick_rate_previous_flash'] = stimulus_presentations['lick_rate'].shift(1)
        if check_if_model_output_available(self.metadata['behavior_session_id']):
            stimulus_presentations = add_model_outputs_to_stimulus_presentations(
                stimulus_presentations, self.metadata['behavior_session_id'])
        else: