diff --git a/.gitignore b/.gitignore index 4d1496e..130f191 100644 --- a/.gitignore +++ b/.gitignore @@ -1,119 +1,74 @@ -# vscode -.vscode - -# User data -.DS_Store - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class -# C extensions -*.so - -# Distribution / packaging +# Distribution, packaging, PyInstaller .Python env/ build/ -develop-eggs/ -dist/ +*egg*/ +*dist/ downloads/ -eggs/ -.eggs/ -lib/ -lib64/ +lib*/ parts/ -sdist/ var/ wheels/ -*.egg-info/ .installed.cfg *.egg -.idea/ - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt +pip-*.txt # Unit test / coverage reports htmlcov/ .tox/ -.coverage -.coverage.* +*.cov* .cache nosetests.xml coverage.xml -*.cover .hypothesis/ +.pytest_cache/ +docker-compose.y*ml -# Translations +# C extension, Translations +# editors: vscode, emacs, Mac +*.so *.mo *.pot +.vscode +**/*~ +**/#*# +**/.#* +.DS_Store -# Django stuff: +# Django, Flask, Scrapy, Sphinx, mkdocs +# PyBuilder, Jupyter, SageMath, celery beat *.log local_settings.py - -# Flask stuff: instance/ .webassets-cache - -# Scrapy stuff: .scrapy scratchpaper.* - -# Sphinx documentation docs/_build/ - -# PyBuilder +/site target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file +.*checkpoints celerybeat-schedule - -# SageMath parsed files *.sage.py -# dotenv -./.env - -# virtualenv -.venv +# dotenv, virtualenv, pyenv, mypy +.*env venv/ ENV/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy +.python-version .mypy_cache/ -# datajoint -dj_local_conf.json -dj_local_conf_old.json +# Spyder/Rope project settings +.spy*project +.ropeproject -# emacs -**/*~ -**/#*# -**/.#* -docker-compose.yml +# datajoint, notes, nwb export +dj_local_c*.json +temp* +*nwb diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cdf4eb8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# Changelog + +Observes [Semantic Versioning](https://semver.org/spec/v2.0.0.html) standard and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) convention. + + +## 0.0.0b2 +### Added ++ Integration tests + +### Changed ++ Name `workflow-animal` -> `workflow-session` + + +## [0.0.0b1] - 2021-03-24 +### Added ++ First beta release + + +## [0.0.0a1] - 2021-03-18 +### Added ++ Added notebooks + +[0.0.0b1]: https://github.com/datajoint/workflow-session/tree/f6d6a3353aae83ca13ff9fcc536017eb34c18f90 +[0.0.0a1]: https://github.com/datajoint/workflow-session/tree/0d3d3c970056ff4c243d17cf4f738f48268d80ad diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..350d3e5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,3 @@ +# Contribution Guidelines + +This project follows the [DataJoint Contribution Guidelines](https://docs.datajoint.org/python/community/02-Contribute.html). Please reference the link for more full details. \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000..9751737 --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,28 @@ +FROM datajoint/djlab:py3.8-debian + +USER anaconda:anaconda + +COPY ./workflow-session/apt_requirements.txt /tmp/ +RUN /entrypoint.sh echo "Installed dependencies." + +RUN mkdir /main/element-lab \ + /main/element-animal \ + /main/element-session \ + /main/workflow-session + +# Copy user's local fork of elements and workflow +COPY --chown=anaconda:anaconda ./element-lab /main/element-lab +COPY --chown=anaconda:anaconda ./element-animal /main/element-animal +COPY --chown=anaconda:anaconda ./element-session /main/element-session +COPY --chown=anaconda:anaconda ./workflow-session /main/workflow-session + +# Install packages +RUN pip install -e /main/element-lab +RUN pip install -e /main/element-animal +RUN pip install -e /main/element-session +RUN pip install -e /main/workflow-session +RUN pip install -r /main/workflow-session/requirements_test.txt + +WORKDIR /main/workflow-session + +ENTRYPOINT ["tail", "-f", "/dev/null"] diff --git a/Dockerfile.test b/Dockerfile.test new file mode 100644 index 0000000..5975f27 --- /dev/null +++ b/Dockerfile.test @@ -0,0 +1,34 @@ +FROM datajoint/djlab:py3.8-debian + +USER anaconda:anaconda + +COPY ./workflow-session/apt_requirements.txt /tmp/ +RUN /entrypoint.sh echo "Installed dependencies." +WORKDIR /main/workflow-session + +# Option 1 - Install DataJoint's remote fork of the workflow and elements +# RUN git clone https://github.com/datajoint/workflow-session.git /main/ + +# Option 2 - Install user's remote fork of element and workflow +# or an unreleased version of the element +# RUN pip install git+https://github.com//element-lab.git +# RUN pip install git+https://github.com//element-animal.git +# RUN pip install git+https://github.com//element-session.git +# RUN git clone https://github.com//workflow-session.git /main/ + +# Option 3 - Install user's local fork of element and workflow +RUN mkdir /main/element-lab +COPY --chown=anaconda:anaconda ./element-lab /main/element-lab +RUN pip install -e /main/element-lab +RUN mkdir /main/element-animal +COPY --chown=anaconda:anaconda ./element-animal /main/element-animal +RUN pip install -e /main/element-animal +RUN mkdir /main/element-session +COPY --chown=anaconda:anaconda ./element-session /main/element-session +RUN pip install -e /main/element-session +COPY --chown=anaconda:anaconda ./workflow-session /main/workflow-session +# RUN rm -f /main/workflow-session/dj_local_conf.json + +# Install the workflow +RUN pip install -e /main/workflow-session +RUN pip install -r /main/workflow-session/requirements_test.txt diff --git a/LICENSE b/LICENSE index a9f8903..2f92789 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 DataJoint NEURO +Copyright (c) 2022 DataJoint Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 72dcb12..39361af 100644 --- a/README.md +++ b/README.md @@ -1,177 +1,47 @@ -# Workflow for lab management and animal management +# Workflow for lab, subject and session management -This workflow builds a workflow to save the information of lab management and animal management, using the following datajoint elements +This directory provides an example workflow to save the information related to lab, subject, and session metadata data management, using the following datajoint elements + [element-lab](https://github.com/datajoint/element-lab) + [element-animal](https://github.com/datajoint/element-animal) ++ [element-session](https://github.com/datajoint/element-session) This repository provides demonstrations for: -Set up a workflow using different elements (see [workflow_animal/pipeline.py](workflow_animal/pipeline.py)) +Setting up a workflow using different elements (see [pipeline.py](workflow_session/pipeline.py)) ## Workflow architecture -The lab and animal management workflow presented here uses components from two DataJoint elements, element-lab, and element-animal assembled together to a functional workflow. +The lab and experiment subject management workflow presented here uses components from three DataJoint elements (element-lab, element-animal and element-session) assembled together into a functional workflow. ### element-lab -![lab](images/lab_diagram.svg) +![element-lab]( +https://github.com/datajoint/element-lab/raw/main/images/lab_diagram.svg) ### element-animal -element-animal contains two modules, `subject` and `genotyping`. +![element-animal]( +https://github.com/datajoint/element-animal/blob/main/images/subject_diagram.svg) -`subject` contains basic information of subjects. -![subject](images/subject_diagram.svg) +`genotyping` is designed for labs that handle animal care and genetic information themselves, which is optional. +![genotyping](https://github.com/datajoint/element-animal/blob/main/images/genotyping_diagram.svg) +### element-session +`session` is designed to handle metadata related to data collection, including collection date-time, file paths, and notes. Most workflows will include element-session as a starting point for further data entry. +![session](https://github.com/datajoint/element-session/blob/main/images/session_diagram.svg) -`genotyping` is designed for labs that handle animal care and genotyping themselves, which is optional. -![genotyping](images/genotyping_diagram.svg) +### This workflow +This workflow serves as an example of the upstream part of a typical data workflow, for examples using these elements in tandem with other data collection modalities, refer to: -This workflow serves as an example of the upstream part of a typical data workflow, for examples using these two elements more intact workflows, refer to: - -+ [workflow-array-ephys]https://github.com/datajoint/workflow-array-ephys -+ [workflow-calcium-imaging]https://github.com/datajoint/workflow-calcium-imaging ++ [workflow-array-ephys](https://github.com/datajoint/workflow-array-ephys) ++ [workflow-calcium-imaging](https://github.com/datajoint/workflow-calcium-imaging) ## Installation instructions -### Step 1 - Clone this repository - -+ Launch a new terminal and change directory to where you want to clone the repository - ``` - cd C:/Projects - ``` -+ Clone the repository - ``` - git clone https://github.com/datajoint/workflow-animal - ``` -+ Change directory to `workflow-animal` - ``` - cd workflow-animal - ``` - -### Step 2 - Setup a virtual environment -It is highly recommended (though not strictly required) to create a virtual environment to run the pipeline. - -+ You can install with `virtualenv` or `conda`. Below are the commands for `virtualenv`. - -+ If `virtualenv` not yet installed, run `pip install --user virtualenv` - -+ To create a new virtual environment named `venv`: - ``` - virtualenv venv - ``` - -+ To activated the virtual environment: - + On Windows: - ``` - .\venv\Scripts\activate - ``` - - + On Linux/macOS: - ``` - source venv/bin/activate - ``` - -### Step 3 - Install this repository - -From the root of the cloned repository directory: - ``` - pip install -e . - ``` - -Note: the `-e` flag will install this repository in editable mode, -in case there's a need to modify the code (e.g. the `pipeline.py` or `paths.py` scripts). -If no such modification required, using `pip install .` is sufficient - - -### Step 4 - Jupyter Notebook -+ Register an IPython kernel with Jupyter - ``` - ipython kernel install --name=workflow-animal - ``` - -### Step 5 - Configure the `dj_local_conf.json` - -At the root of the repository folder, -create a new file `dj_local_conf.json` with the following template: - -```json -{ - "database.host": "", - "database.user": "", - "database.password": "", - "loglevel": "INFO", - "safemode": true, - "display.limit": 7, - "display.width": 14, - "display.show_tuple_count": true, - "custom": { - "database.prefix": "", -} -``` - -+ Specify database's `hostname`, `username`, and `password` properly. - -+ Specify a `database.prefix` to create the schemas. - - -### Installation complete - -+ At this point the setup of this workflow is complete. - - -## Interacting with the DataJoint pipeline and exploring data - -+ Connect to database and import tables - ``` - from workflow_animal.pipeline import * - ``` - This will create all tables defined in the elements in the database server. - -+ Preview the tables created by calling the classes, for example: - ``` - lab.Lab() - subject.Subject() - genotyping.GenotypingTest() - ``` - -+ If required to drop all schemas, the following is the dependency order. - ``` - from workflow_animal.pipeline import * - - genotyping.schema.drop() - subject.schema.drop() - lab.schema.drop() - ``` - -+ For a more in-depth exploration of the tables created, please refer to the example [notebook](notebooks/explore_workflow.ipynb). - - - -## Insert into Manual and Lookup tables with Graphical User Interface DataJoint Labbook - -DataJoint Neuro also provides a Graphical User Interface [DataJoint Labbook](https://github.com/datajoint/datajoint-labbook) to support manual data insertions into DataJoint workflows. - -![DataJoint Labbook preview](images/DataJoint_Labbook.png) - -Please refer to the [DataJoint Labbook page](https://github.com/datajoint/datajoint-labbook) for instructions to set it up. - -## Development mode installation ++ The installation instructions can be found in the [datajoint-elements repository]( + https://github.com/datajoint/datajoint-elements/blob/main/gh-pages/docs/install.md). -This method allows you to modify the source code for `workflow-calcium-imaging`, `element-calcium-imaging`, `element-animal`, and `element-lab`. +## Interacting with the DataJoint workflow -+ Launch a new terminal and change directory to where you want to clone the repositories - ``` - cd C:/Projects - ``` -+ Clone the repositories - ``` - git clone https://github.com/datajoint/element-lab - git clone https://github.com/datajoint/element-animal - git clone https://github.com/datajoint/workflow-animal - ``` -+ Install each package with the `-e` option - ``` - pip install -e ./workflow-animal - pip install -e ./element-lab - pip install -e ./element-animal - ``` ++ Please refer to the following workflow-specific +[Jupyter notebooks](/notebooks) for an in-depth explanation of how to run the +workflow ([1-Explore_Workflow.ipynb](notebooks/1_Explore_Workflow.ipynb)). diff --git a/apt_requirements.txt b/apt_requirements.txt new file mode 100644 index 0000000..2907e3f --- /dev/null +++ b/apt_requirements.txt @@ -0,0 +1,2 @@ +git +locales-all \ No newline at end of file diff --git a/docker-compose-dev.yaml b/docker-compose-dev.yaml new file mode 100644 index 0000000..3bcb2eb --- /dev/null +++ b/docker-compose-dev.yaml @@ -0,0 +1,31 @@ +# docker-compose -f docker-compose-dev.yaml up -d --build +# docker-compose -f docker-compose-dev.yaml down + +version: "2.4" +x-net: &net + networks: + - main +services: + db: + <<: *net + image: datajoint/mysql:5.7 + environment: + - MYSQL_ROOT_PASSWORD=simple + workflow: + <<: *net + build: + context: ../ + dockerfile: ./workflow-session/Dockerfile.dev + env_file: .env + image: workflow_session_dev:0.0.0b2 + volumes: + - ./apt_requirements.txt:/tmp/apt_requirements.txt + - ../element-lab:/main/element-lab + - ../element-animal:/main/element-animal + - ../element-session:/main/element-session + - .:/main/workflow-session + depends_on: + db: + condition: service_healthy +networks: + main: diff --git a/docker-compose-test.yaml b/docker-compose-test.yaml new file mode 100644 index 0000000..3c4b7f4 --- /dev/null +++ b/docker-compose-test.yaml @@ -0,0 +1,45 @@ +# export COMPOSE_DOCKER_CLI_BUILD=0 # some machines need for smooth --build +# docker-compose -f docker-compose-test.yaml up --build +# docker exec -it workflow-session_workflow_1 /bin/bash +# docker-compose -f docker-compose-test.yaml down + +version: "2.4" +x-net: &net + networks: + - main +services: + db: + <<: *net + image: datajoint/mysql:5.7 + environment: + - MYSQL_ROOT_PASSWORD=simple + workflow: + <<: *net + build: + context: ../ + dockerfile: ./workflow-session/Dockerfile.test + env_file: .env + image: workflow_session_test:0.0.0b2 + environment: + - DJ_HOST=db + - DJ_USER=root + - DJ_PASS=simple + - DATABASE_PREFIX=test_ + command: + - bash + - -c + - | + echo "------ INTEGRATION TESTS ------" + pytest -sv --cov-report term-missing --cov=workflow_session -p no:warnings + tail -f /dev/null + volumes: + - ./apt_requirements.txt:/tmp/apt_requirements.txt + - ../element-lab:/main/element-lab + - ../element-animal:/main/element-animal + - ../element-session:/main/element-session + - .:/main/workflow-session + depends_on: + db: + condition: service_healthy +networks: + main: diff --git a/images/DataJoint_Labbook.png b/images/DataJoint_Labbook.png deleted file mode 100644 index 0d96d1d..0000000 Binary files a/images/DataJoint_Labbook.png and /dev/null differ diff --git a/images/genotyping_diagram.svg b/images/genotyping_diagram.svg deleted file mode 100644 index a6d6471..0000000 --- a/images/genotyping_diagram.svg +++ /dev/null @@ -1,138 +0,0 @@ - - - - - -`genotyping`.`breeding_pair__father` - -`genotyping`.`breeding_pair__father` - - - -`genotyping`.`breeding_pair__mother` - -`genotyping`.`breeding_pair__mother` - - - -genotyping.AlleleSequence - - -genotyping.AlleleSequence - - - - - -genotyping.BreedingPair - - -genotyping.BreedingPair - - - - - -genotyping.BreedingPair->`genotyping`.`breeding_pair__father` - - - - -genotyping.BreedingPair->`genotyping`.`breeding_pair__mother` - - - - -genotyping.Litter - - -genotyping.Litter - - - - - -genotyping.BreedingPair->genotyping.Litter - - - - -genotyping.SubjectLitter - - -genotyping.SubjectLitter - - - - - -genotyping.Weaning - - -genotyping.Weaning - - - - - -genotyping.Litter->genotyping.SubjectLitter - - - - -genotyping.Litter->genotyping.Weaning - - - - -genotyping.SubjectCaging - - -genotyping.SubjectCaging - - - - - -genotyping.GenotypeTest - - -genotyping.GenotypeTest - - - - - -genotyping.Sequence - - -genotyping.Sequence - - - - - -genotyping.Sequence->genotyping.AlleleSequence - - - - -genotyping.Sequence->genotyping.GenotypeTest - - - - -genotyping.Cage - - -genotyping.Cage - - - - - -genotyping.Cage->genotyping.SubjectCaging - - - - \ No newline at end of file diff --git a/images/lab_diagram.svg b/images/lab_diagram.svg deleted file mode 100644 index b954e4c..0000000 --- a/images/lab_diagram.svg +++ /dev/null @@ -1,130 +0,0 @@ - - - - - -Lab - - -Lab - - - - - -lab.LabMembership - - -lab.LabMembership - - - - - -Lab->lab.LabMembership - - - - -Location - - -Location - - - - - -Lab->Location - - - - -lab.Project - - -lab.Project - - - - - -lab.ProjectUser - - -lab.ProjectUser - - - - - -lab.Project->lab.ProjectUser - - - - -lab.UserRole - - -lab.UserRole - - - - - -lab.UserRole->lab.LabMembership - - - - -lab.ProtocolType - - -lab.ProtocolType - - - - - -Protocol - - -Protocol - - - - - -lab.ProtocolType->Protocol - - - - -Source - - -Source - - - - - -User - - -User - - - - - -User->lab.ProjectUser - - - - -User->lab.LabMembership - - - - \ No newline at end of file diff --git a/images/subject_diagram.svg b/images/subject_diagram.svg deleted file mode 100644 index ecb6c89..0000000 --- a/images/subject_diagram.svg +++ /dev/null @@ -1,191 +0,0 @@ - - - - - -`subject`.`subject__protocol` - -`subject`.`subject__protocol` - - - -`subject`.`#allele__source` - -`subject`.`#allele__source` - - - -`subject`.`subject__source` - -`subject`.`subject__source` - - - -`subject`.`#line__allele` - -`subject`.`#line__allele` - - - -`subject`.`subject__strain` - -`subject`.`subject__strain` - - - -`subject`.`subject__line` - -`subject`.`subject__line` - - - -`subject`.`subject__lab` - -`subject`.`subject__lab` - - - -`subject`.`subject__user` - -`subject`.`subject__user` - - - -subject.Subject - - -subject.Subject - - - - - -subject.Subject->`subject`.`subject__protocol` - - - - -subject.Subject->`subject`.`subject__source` - - - - -subject.Subject->`subject`.`subject__strain` - - - - -subject.Subject->`subject`.`subject__line` - - - - -subject.Subject->`subject`.`subject__lab` - - - - -subject.Subject->`subject`.`subject__user` - - - - -subject.SubjectDeath - - -subject.SubjectDeath - - - - - -subject.Subject->subject.SubjectDeath - - - - -subject.SubjectCullMethod - - -subject.SubjectCullMethod - - - - - -subject.Subject->subject.SubjectCullMethod - - - - -subject.Zygosity - - -subject.Zygosity - - - - - -subject.Subject->subject.Zygosity - - - - -subject.Strain - - -subject.Strain - - - - - -subject.Strain->`subject`.`subject__strain` - - - - -subject.Allele - - -subject.Allele - - - - - -subject.Allele->`subject`.`#allele__source` - - - - -subject.Allele->`subject`.`#line__allele` - - - - -subject.Allele->subject.Zygosity - - - - -subject.Line - - -subject.Line - - - - - -subject.Line->`subject`.`#line__allele` - - - - -subject.Line->`subject`.`subject__line` - - - - \ No newline at end of file diff --git a/notebooks/1_Explore_Workflow.ipynb b/notebooks/1_Explore_Workflow.ipynb new file mode 100644 index 0000000..cc3fba4 --- /dev/null +++ b/notebooks/1_Explore_Workflow.ipynb @@ -0,0 +1,1541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d26010d6-acbc-4c90-8b62-a2448c50452d", + "metadata": {}, + "source": [ + "# DataJoint U24 - Workflow Session" + ] + }, + { + "cell_type": "markdown", + "id": "c5ffe5d2-5b2a-45c3-8d8f-8c20efa8c5eb", + "metadata": {}, + "source": [ + "This notebook will describe the steps to explore the lab and animal management tables created by the elements.\n", + "Prior to using this notebook, please refer to the README for the installation instructions." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4351c4bb-9763-4d4d-8558-37662adc930e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connecting root@localhost:3306\n" + ] + }, + { + "data": { + "text/plain": [ + "DataJoint connection (connected) root@localhost:3306" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# change to the upper level folder to detect dj_local_conf.json\n", + "import os\n", + "if os.path.basename(os.getcwd())=='notebooks': os.chdir('..')\n", + "import datajoint as dj\n", + "dj.conn()" + ] + }, + { + "cell_type": "markdown", + "id": "ee820754-bceb-476a-acf9-238fa8b201d9", + "metadata": {}, + "source": [ + "Importing the module `workflow_session.pipeline` is sufficient to create tables inside the elements. This workflow comes prepackaged with example data and ingestion functions to populate lab, subject, and session tables." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "868b79bc-f754-4d51-a327-94a209cde374", + "metadata": {}, + "outputs": [], + "source": [ + "from element_lab import lab\n", + "from element_animal import subject\n", + "from element_session import session\n", + "from workflow_session.ingest import ingest_lab, ingest_subjects, ingest_sessions" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9c211f0c-16fd-4d51-abf3-d67bbe271c26", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "---- Inserting 0 entry(s) into #lab ----\n", + "\n", + "---- Inserting 0 entry(s) into #location ----\n", + "\n", + "---- Inserting 0 entry(s) into #project ----\n", + "\n", + "---- Inserting 2 entry(s) into #project__source_code ----\n", + "\n", + "---- Inserting 2 entry(s) into #project__publication ----\n", + "\n", + "---- Inserting 3 entry(s) into #project__keywords ----\n", + "\n", + "---- Inserting 2 entry(s) into #protocol_type ----\n", + "\n", + "---- Inserting 2 entry(s) into #protocol ----\n", + "\n", + "---- Inserting 2 entry(s) into #user_role ----\n", + "\n", + "---- Inserting 3 entry(s) into #user ----\n", + "\n", + "---- Inserting 3 entry(s) into #lab_membership ----\n", + "\n", + "---- Inserting 4 entry(s) into project_user ----\n", + "\n", + "---- Inserting 2 entry(s) into subject ----\n", + "\n", + "---- Inserting 2 entry(s) into subject_death ----\n", + "\n", + "---- Inserting 2 entry(s) into subject_cull_method ----\n", + "\n", + "---- Inserting 2 entry(s) into session ----\n", + "\n", + "---- Inserting 2 entry(s) into session_directory ----\n", + "\n", + "---- Inserting 2 entry(s) into session_note ----\n" + ] + } + ], + "source": [ + "ingest_lab(); ingest_subjects();ingest_sessions()" + ] + }, + { + "cell_type": "markdown", + "id": "2e19116d-bc32-4cea-9caf-f3e8eaa9b181", + "metadata": {}, + "source": [ + "## Workflow architecture" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1e7a0a8b-eaf1-41a1-bf08-1aff2f2812be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

lab

\n", + " Abbreviated lab name\n", + "
\n", + "

lab_name

\n", + " full lab name\n", + "
\n", + "

institution

\n", + " \n", + "
\n", + "

address

\n", + " \n", + "
\n", + "

time_zone

\n", + " \n", + "
LabAThe Example LabExample Uni'221B Baker StLondon NW1 6XE
LabBThe Other LabOther Uni'Oxford OX1 2JD United Kingdom'
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*lab lab_name institution address time_zone \n", + "+------+ +------------+ +------------+ +------------+ +------------+\n", + "LabA The Example La Example Uni '221B Baker St London NW1 6XE\n", + "LabB The Other Lab Other Uni 'Oxford OX1 2J United Kingdo\n", + " (Total: 2)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lab.Lab()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "63679df4-3064-402b-99ce-2f553dff877b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.UserRole\n", + "\n", + "\n", + "lab.UserRole\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.LabMembership\n", + "\n", + "\n", + "lab.LabMembership\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.UserRole->lab.LabMembership\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project.Keywords\n", + "\n", + "\n", + "lab.Project.Keywords\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project\n", + "\n", + "\n", + "lab.Project\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project->lab.Project.Keywords\n", + "\n", + "\n", + "\n", + "\n", + "lab.ProjectUser\n", + "\n", + "\n", + "lab.ProjectUser\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project->lab.ProjectUser\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project.Publication\n", + "\n", + "\n", + "lab.Project.Publication\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project->lab.Project.Publication\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project.SourceCode\n", + "\n", + "\n", + "lab.Project.SourceCode\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Project->lab.Project.SourceCode\n", + "\n", + "\n", + "\n", + "\n", + "lab.Location\n", + "\n", + "\n", + "lab.Location\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Protocol\n", + "\n", + "\n", + "lab.Protocol\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Source\n", + "\n", + "\n", + "lab.Source\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.User\n", + "\n", + "\n", + "lab.User\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.User->lab.ProjectUser\n", + "\n", + "\n", + "\n", + "\n", + "lab.User->lab.LabMembership\n", + "\n", + "\n", + "\n", + "\n", + "lab.Lab\n", + "\n", + "\n", + "lab.Lab\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.Lab->lab.LabMembership\n", + "\n", + "\n", + "\n", + "\n", + "lab.Lab->lab.Location\n", + "\n", + "\n", + "\n", + "\n", + "lab.ProtocolType\n", + "\n", + "\n", + "lab.ProtocolType\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "lab.ProtocolType->lab.Protocol\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.Diagram(lab)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8cf0f64b-e523-4a94-9a43-fca4ed793f82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " Animal Subject\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

sex

\n", + " \n", + "
\n", + "

subject_birth_date

\n", + " \n", + "
\n", + "

subject_description

\n", + " \n", + "
subject5F2020-01-01rich
subject6M2020-01-01manuel
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*subject sex subject_birth_ subject_descri\n", + "+----------+ +-----+ +------------+ +------------+\n", + "subject5 F 2020-01-01 rich \n", + "subject6 M 2020-01-01 manuel \n", + " (Total: 2)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subject.Subject()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "75576be2-2984-451f-a86b-f05f9ddec6b7", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject.Strain\n", + "\n", + "\n", + "subject.Subject.Strain\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Strain\n", + "\n", + "\n", + "subject.Strain\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Strain->subject.Subject.Strain\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject.Protocol\n", + "\n", + "\n", + "subject.Subject.Protocol\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject.Line\n", + "\n", + "\n", + "subject.Subject.Line\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Line.Allele\n", + "\n", + "\n", + "subject.Line.Allele\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Allele\n", + "\n", + "\n", + "subject.Allele\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Allele->subject.Line.Allele\n", + "\n", + "\n", + "\n", + "\n", + "subject.Allele.Source\n", + "\n", + "\n", + "subject.Allele.Source\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Allele->subject.Allele.Source\n", + "\n", + "\n", + "\n", + "\n", + "subject.Zygosity\n", + "\n", + "\n", + "subject.Zygosity\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Allele->subject.Zygosity\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject\n", + "\n", + "\n", + "subject.Subject\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Subject.Strain\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Subject.Protocol\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Subject.Line\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject.Lab\n", + "\n", + "\n", + "subject.Subject.Lab\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Subject.Lab\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject.Source\n", + "\n", + "\n", + "subject.Subject.Source\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Subject.Source\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject.User\n", + "\n", + "\n", + "subject.Subject.User\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Subject.User\n", + "\n", + "\n", + "\n", + "\n", + "subject.SubjectDeath\n", + "\n", + "\n", + "subject.SubjectDeath\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.SubjectDeath\n", + "\n", + "\n", + "\n", + "\n", + "subject.SubjectCullMethod\n", + "\n", + "\n", + "subject.SubjectCullMethod\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.SubjectCullMethod\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->subject.Zygosity\n", + "\n", + "\n", + "\n", + "\n", + "subject.Line\n", + "\n", + "\n", + "subject.Line\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Line->subject.Subject.Line\n", + "\n", + "\n", + "\n", + "\n", + "subject.Line->subject.Line.Allele\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.Diagram(subject)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5243a782-93da-40fa-b243-03ddcb230c1d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

session_datetime

\n", + " \n", + "
subject52020-04-15 11:16:38
subject62021-06-02 14:04:22
\n", + " \n", + "

Total: 2

\n", + " " + ], + "text/plain": [ + "*subject *session_datet\n", + "+----------+ +------------+\n", + "subject5 2020-04-15 11:\n", + "subject6 2021-06-02 14:\n", + " (Total: 2)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "session.Session()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7e48d7c0-b7bd-4f0b-abcb-1aedc69d5310", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "session.ProjectSession\n", + "\n", + "\n", + "session.ProjectSession\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.SessionExperimenter\n", + "\n", + "\n", + "session.SessionExperimenter\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.Session\n", + "\n", + "\n", + "session.Session\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.Session->session.ProjectSession\n", + "\n", + "\n", + "\n", + "\n", + "session.Session->session.SessionExperimenter\n", + "\n", + "\n", + "\n", + "\n", + "session.SessionNote\n", + "\n", + "\n", + "session.SessionNote\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.Session->session.SessionNote\n", + "\n", + "\n", + "\n", + "\n", + "session.SessionDirectory\n", + "\n", + "\n", + "session.SessionDirectory\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.Session->session.SessionDirectory\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.Diagram(session)" + ] + }, + { + "cell_type": "markdown", + "id": "c510fe4d-09ed-472f-830f-4401bd6830d0", + "metadata": {}, + "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1984077d-a9f2-4b17-8034-ee2af8f105f4", + "metadata": {}, + "outputs": [], + "source": [ + "# dj.Diagram(genotyping) + dj.Diagram(subject.Subject) + dj.Diagram(subject.Allele)" + ] + }, + { + "cell_type": "markdown", + "id": "b60f5f4c-d366-4034-a40d-2d2095cb2a14", + "metadata": {}, + "source": [ + "## Explore each table" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9c0821e1-9125-4c41-bc9c-567f53d0a5e5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Animal Subject\n", + "subject : varchar(8) \n", + "---\n", + "sex : enum('M','F','U') \n", + "subject_birth_date : date \n", + "subject_description=\"\" : varchar(1024) \n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'# Animal Subject\\nsubject : varchar(8) \\n---\\nsex : enum(\\'M\\',\\'F\\',\\'U\\') \\nsubject_birth_date : date \\nsubject_description=\"\" : varchar(1024) \\n'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check table definition with describe()\n", + "subject.Subject.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "504d55ed-25bd-4bb1-bce9-c516bd3595df", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-> subject.Subject\n", + "-> subject.Allele\n", + "---\n", + "zygosity : enum('Present','Absent','Homozygous','Heterozygous') \n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "\"-> subject.Subject\\n-> subject.Allele\\n---\\nzygosity : enum('Present','Absent','Homozygous','Heterozygous') \\n\"" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check table definition with dependencies with describe()\n", + "subject.Zygosity.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1edded31-bdef-42fb-8f58-b42b9b186cf1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# \n", + "subject : varchar(8) # \n", + "allele : varchar(32) # abbreviated allele name\n", + "---\n", + "zygosity : enum('Present','Absent','Homozygous','Heterozygous') # " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check the name of every attribute with heading, \n", + "# which will spell out the foreign key definition inherited from another table\n", + "subject.Zygosity.heading" + ] + }, + { + "cell_type": "markdown", + "id": "f6c110c0-0966-4283-a0ba-a7de2ce69e25", + "metadata": {}, + "source": [ + "## Insert data into Manual and Lookup tables" + ] + }, + { + "cell_type": "markdown", + "id": "54cf050e-882e-4672-be31-1ca3df52fa58", + "metadata": {}, + "source": [ + "Tables in this workflow are either manual tables or lookup tables. To insert into these tables, DataJoint provide method `.insert1()` and `insert()`." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d5b43904-9711-4bce-8ae5-d0d797118dec", + "metadata": {}, + "outputs": [], + "source": [ + "subject.Subject.insert1(\n", + " dict(subject='subject1', sex='M', subject_birth_date='2020-12-30', \n", + " subject_description='test animal'), skip_duplicates=True)\n", + "subject.Subject.insert1(\n", + " ('subject2', 'F', '2020-11-30', 'test animal'), skip_duplicates=True)" + ] + }, + { + "cell_type": "markdown", + "id": "49d43ca2-2cd3-4659-849f-5bcc09c1367e", + "metadata": {}, + "source": [ + "`skip_duplicates=True` will prevent an error if you already have data for the primary keys in a given entry." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9bf2c953-7b4c-4a70-99fd-124a4d28171b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " Animal Subject\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

sex

\n", + " \n", + "
\n", + "

subject_birth_date

\n", + " \n", + "
\n", + "

subject_description

\n", + " \n", + "
subject1M2020-12-30test animal
subject2F2020-11-30test animal
subject5F2020-01-01rich
subject6M2020-01-01manuel
\n", + " \n", + "

Total: 4

\n", + " " + ], + "text/plain": [ + "*subject sex subject_birth_ subject_descri\n", + "+----------+ +-----+ +------------+ +------------+\n", + "subject1 M 2020-12-30 test animal \n", + "subject2 F 2020-11-30 test animal \n", + "subject5 F 2020-01-01 rich \n", + "subject6 M 2020-01-01 manuel \n", + " (Total: 4)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subject.Subject()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "7a10ddab-d0fd-45a0-8183-09c1b1933e0a", + "metadata": {}, + "outputs": [], + "source": [ + "# `insert()` takes a list of dicts or tuples\n", + "subject.Subject.insert(\n", + " [dict(subject='subject3', sex='F', subject_birth_date='2020-12-30', \n", + " subject_description='test animal'),\n", + " dict(subject='subject4', sex='M', subject_birth_date='2021-02-12', \n", + " subject_description='test animal')\n", + " ],\n", + " skip_duplicates=True)\n", + "subject.Subject.insert(\n", + " [\n", + " ('subject7', 'U', '2020-08-30', 'test animal'),\n", + " ('subject8', 'F', '2020-09-30', 'test animal')\n", + " ],\n", + " skip_duplicates=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "064ddaae-3410-47fc-be22-671d2afe7fb6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " Animal Subject\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

subject

\n", + " \n", + "
\n", + "

sex

\n", + " \n", + "
\n", + "

subject_birth_date

\n", + " \n", + "
\n", + "

subject_description

\n", + " \n", + "
subject1M2020-12-30test animal
subject2F2020-11-30test animal
subject3F2020-12-30test animal
subject4M2021-02-12test animal
subject5F2020-01-01rich
subject6M2020-01-01manuel
subject7U2020-08-30test animal
subject8F2020-09-30test animal
\n", + " \n", + "

Total: 8

\n", + " " + ], + "text/plain": [ + "*subject sex subject_birth_ subject_descri\n", + "+----------+ +-----+ +------------+ +------------+\n", + "subject1 M 2020-12-30 test animal \n", + "subject2 F 2020-11-30 test animal \n", + "subject3 F 2020-12-30 test animal \n", + "subject4 M 2021-02-12 test animal \n", + "subject5 F 2020-01-01 rich \n", + "subject6 M 2020-01-01 manuel \n", + "subject7 U 2020-08-30 test animal \n", + "subject8 F 2020-09-30 test animal \n", + " (Total: 8)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subject.Subject()" + ] + }, + { + "cell_type": "markdown", + "id": "c47691a0-b016-4092-a5ad-fefff93c54dd", + "metadata": {}, + "source": [ + "For more documentation of insert, please refer to [DataJoint Docs](https://docs.datajoint.org/python/manipulation/1-Insert.html) and [DataJoint CodeBook](https://codebook.datajoint.io/)" + ] + }, + { + "cell_type": "markdown", + "id": "13f8a8ed-2656-46d8-82ba-cdf130c4873e", + "metadata": {}, + "source": [ + "## Insert into Manual and Lookup tables with a Graphical User Interface" + ] + }, + { + "cell_type": "markdown", + "id": "4775dd80-8a54-47b7-a9ba-99995db9ff1a", + "metadata": {}, + "source": [ + "DataJoint also provides a graphical user interface ([DataJoint LabBook](https://github.com/datajoint/datajoint-labbook)) to support manual data insertions into DataJoint workflows. ![DataJoint LabBook preview](https://github.com/datajoint/datajoint-labbook/blob/master/docs/sphinx/_static/images/walkthroughDemoOptimized.gif)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv-nwb", + "language": "python", + "name": "venv-nwb" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/explore_workflow.ipynb b/notebooks/explore_workflow.ipynb deleted file mode 100644 index dc736a6..0000000 --- a/notebooks/explore_workflow.ipynb +++ /dev/null @@ -1,1252 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# DataJoint U24 Workflow Animal" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook will describe the steps to explore the lab and animal management tables created by the elements. \n", - "Prior to using this notebook, please refer to the README for the installation instructions." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connecting shan@localhost:3306\n" - ] - } - ], - "source": [ - "# change to the upper level folder to detect dj_local_conf.json\n", - "import os\n", - "os.chdir('..')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Importing the module `workflow_animal.pipeline` is sufficient to create tables inside the elements" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from workflow_animal.pipeline import lab, subject, genotyping" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Workflow architecture" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "Lab\n", - "\n", - "\n", - "Lab\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "lab.LabMembership\n", - "\n", - "\n", - "lab.LabMembership\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Lab->lab.LabMembership\n", - "\n", - "\n", - "\n", - "\n", - "Location\n", - "\n", - "\n", - "Location\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Lab->Location\n", - "\n", - "\n", - "\n", - "\n", - "lab.Project\n", - "\n", - "\n", - "lab.Project\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "lab.ProjectUser\n", - "\n", - "\n", - "lab.ProjectUser\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "lab.Project->lab.ProjectUser\n", - "\n", - "\n", - "\n", - "\n", - "lab.UserRole\n", - "\n", - "\n", - "lab.UserRole\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "lab.UserRole->lab.LabMembership\n", - "\n", - "\n", - "\n", - "\n", - "lab.ProtocolType\n", - "\n", - "\n", - "lab.ProtocolType\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Protocol\n", - "\n", - "\n", - "Protocol\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "lab.ProtocolType->Protocol\n", - "\n", - "\n", - "\n", - "\n", - "Source\n", - "\n", - "\n", - "Source\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "User\n", - "\n", - "\n", - "User\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "User->lab.ProjectUser\n", - "\n", - "\n", - "\n", - "\n", - "User->lab.LabMembership\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dj.Diagram(lab)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "`subject`.`subject__protocol`\n", - "\n", - "`subject`.`subject__protocol`\n", - "\n", - "\n", - "\n", - "`subject`.`#allele__source`\n", - "\n", - "`subject`.`#allele__source`\n", - "\n", - "\n", - "\n", - "`subject`.`subject__source`\n", - "\n", - "`subject`.`subject__source`\n", - "\n", - "\n", - "\n", - "`subject`.`#line__allele`\n", - "\n", - "`subject`.`#line__allele`\n", - "\n", - "\n", - "\n", - "`subject`.`subject__strain`\n", - "\n", - "`subject`.`subject__strain`\n", - "\n", - "\n", - "\n", - "`subject`.`subject__line`\n", - "\n", - "`subject`.`subject__line`\n", - "\n", - "\n", - "\n", - "`subject`.`subject__lab`\n", - "\n", - "`subject`.`subject__lab`\n", - "\n", - "\n", - "\n", - "`subject`.`subject__user`\n", - "\n", - "`subject`.`subject__user`\n", - "\n", - "\n", - "\n", - "subject.Subject\n", - "\n", - "\n", - "subject.Subject\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`subject`.`subject__protocol`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`subject`.`subject__source`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`subject`.`subject__strain`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`subject`.`subject__line`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`subject`.`subject__lab`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`subject`.`subject__user`\n", - "\n", - "\n", - "\n", - "\n", - "subject.SubjectDeath\n", - "\n", - "\n", - "subject.SubjectDeath\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->subject.SubjectDeath\n", - "\n", - "\n", - "\n", - "\n", - "subject.SubjectCullMethod\n", - "\n", - "\n", - "subject.SubjectCullMethod\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->subject.SubjectCullMethod\n", - "\n", - "\n", - "\n", - "\n", - "subject.Zygosity\n", - "\n", - "\n", - "subject.Zygosity\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->subject.Zygosity\n", - "\n", - "\n", - "\n", - "\n", - "subject.Strain\n", - "\n", - "\n", - "subject.Strain\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Strain->`subject`.`subject__strain`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Allele\n", - "\n", - "\n", - "subject.Allele\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Allele->`subject`.`#allele__source`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Allele->`subject`.`#line__allele`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Allele->subject.Zygosity\n", - "\n", - "\n", - "\n", - "\n", - "subject.Line\n", - "\n", - "\n", - "subject.Line\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Line->`subject`.`#line__allele`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Line->`subject`.`subject__line`\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dj.Diagram(subject)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "`genotyping`.`breeding_pair__father`\n", - "\n", - "`genotyping`.`breeding_pair__father`\n", - "\n", - "\n", - "\n", - "`genotyping`.`breeding_pair__mother`\n", - "\n", - "`genotyping`.`breeding_pair__mother`\n", - "\n", - "\n", - "\n", - "subject.Subject\n", - "\n", - "\n", - "subject.Subject\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`genotyping`.`breeding_pair__father`\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->`genotyping`.`breeding_pair__mother`\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.SubjectLitter\n", - "\n", - "\n", - "genotyping.SubjectLitter\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->genotyping.SubjectLitter\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.SubjectCaging\n", - "\n", - "\n", - "genotyping.SubjectCaging\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->genotyping.SubjectCaging\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.GenotypeTest\n", - "\n", - "\n", - "genotyping.GenotypeTest\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->genotyping.GenotypeTest\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.BreedingPair\n", - "\n", - "\n", - "genotyping.BreedingPair\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.BreedingPair->`genotyping`.`breeding_pair__father`\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.BreedingPair->`genotyping`.`breeding_pair__mother`\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Litter\n", - "\n", - "\n", - "genotyping.Litter\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.BreedingPair->genotyping.Litter\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.AlleleSequence\n", - "\n", - "\n", - "genotyping.AlleleSequence\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Weaning\n", - "\n", - "\n", - "genotyping.Weaning\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Litter->genotyping.SubjectLitter\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Litter->genotyping.Weaning\n", - "\n", - "\n", - "\n", - "\n", - "subject.Allele\n", - "\n", - "\n", - "subject.Allele\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Allele->genotyping.AlleleSequence\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Sequence\n", - "\n", - "\n", - "genotyping.Sequence\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Sequence->genotyping.AlleleSequence\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Sequence->genotyping.GenotypeTest\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Cage\n", - "\n", - "\n", - "genotyping.Cage\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "genotyping.Cage->genotyping.SubjectCaging\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dj.Diagram(genotyping) + dj.Diagram(subject.Subject) + dj.Diagram(subject.Allele)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Explore each table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "DataJoint provide tools to explore table definitions and table contents." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Animal Subject\n", - "subject : varchar(32) \n", - "---\n", - "sex : enum('M','F','U') \n", - "subject_birth_date : date \n", - "subject_description=\"\" : varchar(1024) \n", - "\n" - ] - } - ], - "source": [ - "# check table definition with describe()\n", - "subject.Subject.describe();" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-> subject.Subject\n", - "-> subject.Allele\n", - "---\n", - "zygosity : enum('Present','Absent','Homozygous','Heterozygous') # zygosity\n", - "\n" - ] - } - ], - "source": [ - "# check table definition with dependencies with describe()\n", - "subject.Zygosity.describe();" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "# \n", - "subject : varchar(32) # \n", - "allele : varchar(32) # abbreviated allele name\n", - "---\n", - "zygosity : enum('Present','Absent','Homozygous','Heterozygous') # zygosity" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# check the name of every attribute with heading, \n", - "# which will spell out the foreign key definition inherited from another table\n", - "subject.Zygosity.heading" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " Animal Subject\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "

subject

\n", - " \n", - "
\n", - "

sex

\n", - " \n", - "
\n", - "

subject_birth_date

\n", - " \n", - "
\n", - "

subject_description

\n", - " \n", - "
\n", - " \n", - "

Total: 0

\n", - " " - ], - "text/plain": [ - "*subject sex subject_birth_ subject_descri\n", - "+---------+ +-----+ +------------+ +------------+\n", - "\n", - " (Total: 0)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# check table contents\n", - "subject.Subject()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Insert data into Manual and Lookup tables" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Tables in this workflow are either manual tables or lookup tables. To insert into these tables, DataJoint provide method `.insert1()` and `insert()`." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "# `insert1()` takes a dict or a tuple\n", - "subject.Subject.insert1(\n", - " dict(subject='subject1', sex='M', subject_birth_date='2020-12-30', \n", - " subject_description='test animal'))\n", - "subject.Subject.insert1(\n", - " ('subject2', 'F', '2020-11-30', 'test animal'))" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " Animal Subject\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

subject

\n", - " \n", - "
\n", - "

sex

\n", - " \n", - "
\n", - "

subject_birth_date

\n", - " \n", - "
\n", - "

subject_description

\n", - " \n", - "
subject1M2020-12-30test animal
subject2F2020-11-30test animal
\n", - " \n", - "

Total: 2

\n", - " " - ], - "text/plain": [ - "*subject sex subject_birth_ subject_descri\n", - "+----------+ +-----+ +------------+ +------------+\n", - "subject1 M 2020-12-30 test animal \n", - "subject2 F 2020-11-30 test animal \n", - " (Total: 2)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "subject.Subject()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "# `insert()` takes a list of dicts or tuples\n", - "subject.Subject.insert(\n", - " [dict(subject='subject3', sex='F', subject_birth_date='2020-12-30', \n", - " subject_description='test animal'),\n", - " dict(subject='subject4', sex='M', subject_birth_date='2021-02-12', \n", - " subject_description='test animal'),\n", - " dict(subject='subject5', sex='U', subject_birth_date='2020-12-30', \n", - " subject_description='test animal'),\n", - " ]\n", - ")\n", - "subject.Subject.insert(\n", - " [\n", - " ('subject6', 'M', '2020-07-30', 'test animal'),\n", - " ('subject7', 'U', '2020-08-30', 'test animal'),\n", - " ('subject8', 'F', '2020-09-30', 'test animal')\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " Animal Subject\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

subject

\n", - " \n", - "
\n", - "

sex

\n", - " \n", - "
\n", - "

subject_birth_date

\n", - " \n", - "
\n", - "

subject_description

\n", - " \n", - "
subject1M2020-12-30test animal
subject2F2020-11-30test animal
subject3F2020-12-30test animal
subject4M2021-02-12test animal
subject5U2020-12-30test animal
subject6M2020-07-30test animal
subject7U2020-08-30test animal
subject8F2020-09-30test animal
\n", - " \n", - "

Total: 8

\n", - " " - ], - "text/plain": [ - "*subject sex subject_birth_ subject_descri\n", - "+----------+ +-----+ +------------+ +------------+\n", - "subject1 M 2020-12-30 test animal \n", - "subject2 F 2020-11-30 test animal \n", - "subject3 F 2020-12-30 test animal \n", - "subject4 M 2021-02-12 test animal \n", - "subject5 U 2020-12-30 test animal \n", - "subject6 M 2020-07-30 test animal \n", - "subject7 U 2020-08-30 test animal \n", - "subject8 F 2020-09-30 test animal \n", - " (Total: 8)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "subject.Subject()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more documentation of insert, please refer to [DataJoint Docs](https://docs.datajoint.io/python/manipulation/1-Insert.html) and [DataJoint playground](https://playground.datajoint.io/)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Insert into Manual and Lookup tables with Graphical User Interface" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "DataJoint Neuro also provides a Graphical User Interface [DataJoint Labbook](https://github.com/datajoint/datajoint-labbook) to support manual data insertions into DataJoint workflows.\n", - " \n", - "![DataJoint Labbook preview](../images/DataJoint_Labbook.png)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "workflow-imaging", - "language": "python", - "name": "workflow-imaging" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/requirements.txt b/requirements.txt index 6455282..8cf9a5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ datajoint>=0.13.0 -element-lab -element-animal +element-lab==0.1.0b1 +element-animal==0.1.0b0 +element-session==0.1.0b0 ipykernel +pynwb==1.4.0 diff --git a/requirements_test.txt b/requirements_test.txt new file mode 100644 index 0000000..e76c2da --- /dev/null +++ b/requirements_test.txt @@ -0,0 +1,3 @@ +pytest +pytest-cov +djarchive-client @ git+https://github.com/datajoint/djarchive-client.git \ No newline at end of file diff --git a/setup.py b/setup.py index 1c10e13..bbdb2fa 100644 --- a/setup.py +++ b/setup.py @@ -1,31 +1,34 @@ -#!/usr/bin/env python from setuptools import setup, find_packages from os import path -import sys +pkg_name = 'workflow_session' here = path.abspath(path.dirname(__file__)) long_description = """" -# Workflow for lab management and animal management +# Workflow for lab, animal, and session management Build a workflow for lab management and animal metadata using DataJoint Elements -+ [elements-lab](https://github.com/datajoint/elements-lab) -+ [elements-animal](https://github.com/datajoint/elements-animal) ++ [element-lab](https://github.com/datajoint/element-lab) ++ [element-animal](https://github.com/datajoint/element-animal) ++ [element-session](https://github.com/datajoint/element-session) """ with open(path.join(here, 'requirements.txt')) as f: requirements = f.read().splitlines() +with open(path.join(here, pkg_name, 'version.py')) as f: + exec(f.read()) + setup( - name='workflow-animal', - version='0.0.1', - description="DataJoint Elements for Animal Management", + name='workflow-session', + version=__version__, + description="DataJoint Elements for Lab, Animal and Session Management", long_description=long_description, - author='DataJoint NEURO', - author_email='info@vathes.com', + author='DataJoint', + author_email='info@datajoint.com', license='MIT', - url='https://github.com/datajoint/workflow-animal', - keywords='neuroscience lab-management animal-management datajoint', + url='https://github.com/datajoint/workflow-session', + keywords='neuroscience lab-management animal-management session datajoint', packages=find_packages(exclude=['contrib', 'docs', 'tests*']), install_requires=requirements, ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..c9e0193 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,297 @@ +''' +run all tests: + pytest -sv --cov-report term-missing --cov=workflow_session -p no:warnings tests/ +run one test, debug: + pytest [above options] --pdb tests/tests_name.py -k function_name +''' + +import os +import sys +import pytest +import pathlib +import datajoint as dj + +# ------------------- SOME CONSTANTS ------------------- + +_tear_down = True +verbose = False + +pathlib.Path('./tests/user_data').mkdir(exist_ok=True) +pathlib.Path('./tests/user_data/lab').mkdir(exist_ok=True) +pathlib.Path('./tests/user_data/session').mkdir(exist_ok=True) +pathlib.Path('./tests/user_data/subject').mkdir(exist_ok=True) + +# ------------------ GENERAL FUNCTIONS ------------------ + + +def write_csv(content, path): + """ + General function for writing strings to lines in CSV + :param path: pathlib PosixPath + :param content: list of strings, each as row of CSV + """ + with open(path, 'w') as f: + for line in content: + f.write(line+'\n') + + +class QuietStdOut: + """If verbose set to false, used to quiet tear_down table.delete prints""" + def __enter__(self): + self._original_stdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + + def __exit__(self, exc_type, exc_val, exc_tb): + sys.stdout.close() + sys.stdout = self._original_stdout + +# ---------------------- FIXTURES ---------------------- + + +@pytest.fixture(autouse=True) +def dj_config(): + """ If dj_local_config exists, load""" + if pathlib.Path('./dj_local_conf.json').exists(): + dj.config.load('./dj_local_conf.json') + dj.config['safemode'] = False + dj.config['database.host'] = (os.environ.get('DJ_HOST') + or dj.config['database.host']) + dj.config['database.password'] = (os.environ.get('DJ_PASS') + or dj.config['database.password']) + dj.config['database.user'] = (os.environ.get('DJ_USER') + or dj.config['database.user']) + dj.config['custom'] = { + 'database.prefix': (os.environ.get('DATABASE_PREFIX') + or dj.config['custom']['database.prefix'])} + return + + +@pytest.fixture +def pipeline(): + """ Loads workflow_session.pipeline lab, session, subject""" + from workflow_session import pipeline + + yield {'subject': pipeline.subject, + 'session': pipeline.session, + 'lab': pipeline.lab} + + if _tear_down: + if verbose: + pipeline.subject.Subject.delete() + pipeline.session.Session.delete() + pipeline.lab.Lab.delete() + else: + with QuietStdOut(): + pipeline.subject.Subject.delete() + pipeline.session.Session.delete() + pipeline.lab.Lab.delete() + + +@pytest.fixture +def lab_csv(): + """ Create a 'labs.csv' file""" + lab_content = ["lab,lab_name,institution,address," + + "time_zone,location,location_description", + "LabA,The Example Lab,Example Uni," + + "'221B Baker St,London NW1 6XE,UK',UTC+0," + + "Example Building,'2nd floor lab dedicated to all " + + "fictional experiments.'", + "LabB,The Other Lab,Other Uni," + + "'Oxford OX1 2JD, United Kingdom',UTC+0," + + "Other Building,'fictional campus dedicated to imaginary" + + "experiments.'"] + lab_csv_path = pathlib.Path('./tests/user_data/lab/labs.csv') + write_csv(lab_content, lab_csv_path) + + yield lab_content, lab_csv_path + lab_csv_path.unlink() + + +@pytest.fixture +def lab_project_csv(): + """ Create a 'projects.csv' file""" + lab_project_content = ["project,project_description,repository_url," + + "repository_name,codeurl", + "ProjA,Example project to populate element-lab," + + "https://github.com/datajoint/element-lab/," + + "element-lab,https://github.com/datajoint/element" + + "-lab/tree/main/element_lab", + "ProjB,Other example project to populate element-" + + "lab,https://github.com/datajoint/element-session" + + "/,element-session,https://github.com/datajoint/" + + "element-session/tree/main/element_session"] + lab_project_csv_path = pathlib.Path('./tests/user_data/lab/projects.csv') + write_csv(lab_project_content, lab_project_csv_path) + + yield lab_project_content, lab_project_csv_path + lab_project_csv_path.unlink() + + +@pytest.fixture +def lab_project_users_csv(): + """ Create a 'project_users.csv' file""" + lab_project_user_content = ["user,project", + "Sherlock,ProjA", + "Sherlock,ProjB", + "Watson,ProjB", + "Dr. Candace Pert,ProjA", + "User1,ProjA"] + lab_project_user_csv_path = pathlib.Path('./tests/user_data/lab/\ + project_users.csv') + write_csv(lab_project_user_content, lab_project_user_csv_path) + + yield lab_project_user_content, lab_project_user_csv_path + lab_project_user_csv_path.unlink() + + +@pytest.fixture +def lab_publications_csv(): + """ Create a 'publications.csv' file""" + lab_publication_content = ["project,publication", + "ProjA,arXiv:1807.11104", + "ProjA,arXiv:1807.11104v1"] + lab_publication_csv_path = pathlib.Path('./tests/user_data/lab/\ + publications.csv') + write_csv(lab_publication_content, lab_publication_csv_path) + + yield lab_publication_content, lab_publication_csv_path + lab_publication_csv_path.unlink() + + +@pytest.fixture +def lab_keywords_csv(): + """ Create a 'keywords.csv' file""" + lab_keyword_content = ["project,keyword", + "ProjA,Study", + "ProjA,Example", + "ProjB,Alternate"] + lab_keyword_csv_path = pathlib.Path('./tests/user_data/lab/keywords.csv') + write_csv(lab_keyword_content, lab_keyword_csv_path) + + yield lab_keyword_content, lab_keyword_csv_path + lab_keyword_csv_path.unlink() + + +@pytest.fixture +def lab_protocol_csv(): + """ Create a 'protocols.csv' file""" + lab_protocol_content = ["protocol,protocol_type,protocol_description", + "ProtA,IRB expedited review,Protocol for managing " + + "data ingestion", + "ProtB,Alternative Method,Limited protocol for " + + "piloting only"] + lab_protocol_csv_path = pathlib.Path('./tests/user_data/lab/protocols.csv') + write_csv(lab_protocol_content, lab_protocol_csv_path) + + yield lab_protocol_content, lab_protocol_csv_path + lab_protocol_csv_path.unlink() + + +@pytest.fixture +def lab_user_csv(): + """ Create a 'users.csv' file""" + lab_user_content = ["lab,user,user_role,user_email,user_cellphone", + "LabA,Sherlock,PI,Sherlock@BakerSt.com," + + "+44 20 7946 0344", + "LabA,Watson,Dr,DrWatson@BakerSt.com,+44 73 8389 1763", + "LabB,Dr. Candace Pert,PI,Pert@gmail.com," + + "+44 74 4046 5899", + "LabA,User1,Lab Tech,fake@email.com,+44 1632 960103"] + lab_user_csv_path = pathlib.Path('./tests/user_data/lab/users.csv') + write_csv(lab_user_content, lab_user_csv_path) + + yield lab_user_content, lab_user_csv_path + lab_user_csv_path.unlink() + + +@pytest.fixture +def ingest_lab(pipeline, lab_csv, lab_project_csv, lab_publications_csv, + lab_keywords_csv, lab_protocol_csv, lab_user_csv, + lab_project_users_csv): + """ From workflow_session ingest.py, import ingest_lab, run """ + from workflow_session.ingest import ingest_lab + _, lab_csv_path = lab_csv + _, lab_project_csv_path = lab_project_csv + _, lab_publication_csv_path = lab_publications_csv + _, lab_keyword_csv_path = lab_keywords_csv + _, lab_protocol_csv_path = lab_protocol_csv + _, lab_user_csv_path = lab_user_csv + _, lab_project_user_csv_path = lab_project_users_csv + ingest_lab(lab_csv_path=lab_csv_path, + project_csv_path=lab_project_csv_path, + publication_csv_path=lab_publication_csv_path, + keyword_csv_path=lab_keyword_csv_path, + protocol_csv_path=lab_protocol_csv_path, + users_csv_path=lab_user_csv_path, + project_user_csv_path=lab_project_user_csv_path, verbose=verbose) + return + + +# Subject data and ingestion +@pytest.fixture +def subjects_csv(): + """ Create a 'subjects.csv' file""" + subject_content = ["subject,sex,subject_birth_date,subject_description," + + "death_date,cull_method", + "subject5,F,2020-01-01 00:00:01,rich," + + "2020-10-02 00:00:01,natural causes", + "subject6,M,2020-01-01 00:00:01,manuel," + + "2020-10-03 00:00:01,natural causes"] + subject_csv_path = pathlib.Path('./tests/user_data/subject/subjects.csv') + write_csv(subject_content, subject_csv_path) + + yield subject_content, subject_csv_path + subject_csv_path.unlink() + + +@pytest.fixture +def subjects_part_csv(): + """Create a 'subjects_part.csv for Subject part tables""" + subject_part_content = ["subject,protocol,user,line,strain,source,lab", + "subject6,ProtA,User1,line,strain,source,LabA", + "subject5,ProtA,User1,line,strain,source,LabA"] + subject_part_csv_path = pathlib.Path('./tests/user_data/subject/subjects_part.csv') + write_csv(subject_part_content, subject_part_csv_path) + + yield subject_part_content, subject_part_csv_path + subject_part_csv_path.unlink() + + +@pytest.fixture +def ingest_subjects(pipeline, ingest_lab, subjects_csv, subjects_part_csv): + """From workflow_session ingest.py, import ingest_subjects, run""" + from workflow_session.ingest import ingest_subjects + _, subject_csv_path = subjects_csv + _, subject_part_csv_path = subjects_part_csv + ingest_subjects(subject_csv_path=subject_csv_path, + subject_part_csv_path=subject_part_csv_path, verbose=verbose) + return + + +# Session data and ingestion +@pytest.fixture +def sessions_csv(): + """ Create a 'sessions.csv' file""" + session_csv_path = pathlib.Path('./tests/user_data/session/sessions.csv') + session_content = ["subject,project,session_datetime,session_dir,session_note", + "subject5,ProjA,2020-04-15 11:16:38,/subject5/session1," + + "'Successful data collection, no notes'", + "subject5,ProjA,2020-05-12 04:13:07,subject5\\session1," + + "'Data collection notes'", + "subject6,ProjA,2021-06-02 14:04:22,/subject6/session1," + + "'Ambient temp abnormally low'"] + write_csv(session_content, session_csv_path) + + yield session_content, session_csv_path + session_csv_path.unlink() + + +@pytest.fixture +def ingest_sessions(ingest_lab, ingest_subjects, sessions_csv): + """From workflow_session ingest.py, import ingest_sessions, run""" + from workflow_session.ingest import ingest_sessions + _, session_csv_path = sessions_csv + ingest_sessions(session_csv_path=session_csv_path, verbose=verbose) + return + + diff --git a/tests/test_ingest.py b/tests/test_ingest.py new file mode 100644 index 0000000..0c76ecc --- /dev/null +++ b/tests/test_ingest.py @@ -0,0 +1,91 @@ +'''Tests ingestion into schema tables: Lab, Subject, Session + 1. Assert length of populating data from __init__ + 2. Assert exact matches of inserted data for key tables +''' + +__all__ = ['dj_config', 'pipeline', 'lab_csv', 'lab_project_csv', 'lab_user_csv', + 'lab_publications_csv', 'lab_keywords_csv', 'lab_protocol_csv', + 'lab_project_users_csv', 'ingest_lab', 'subjects_csv', 'subjects_part_csv', + 'ingest_subjects', 'sessions_csv', 'ingest_sessions'] + +from . import (dj_config, pipeline, lab_csv, + lab_project_csv, lab_user_csv, lab_publications_csv, + lab_keywords_csv, lab_protocol_csv, + lab_project_users_csv, ingest_lab, + subjects_csv, subjects_part_csv, ingest_subjects, + sessions_csv, ingest_sessions) + + +def test_ingest_lab(pipeline, ingest_lab, + lab_csv, lab_project_csv, lab_protocol_csv): + """Check length of various lab schema tables""" + lab = pipeline['lab'] + assert len(lab.Lab()) == 2, f'Check Lab: len={len(lab.Lab())}' + assert len(lab.LabMembership()) == 4, \ + f'Check LabMembership: len={len(lab.LabMembership())}' + assert len(lab.User()) == 4, f'Check User: len={len(lab.User())}' + assert len(lab.UserRole()) == 3, f'Check UserRole: len={len(lab.UserRole())}' + assert len(lab.Location()) == 2, f'Check Location: len={len(lab.Location())}' + assert len(lab.Project()) == 2, f'Check Project: len={len(lab.Project())}' + assert len(lab.ProjectUser()) == 5, \ + f'Check ProjectUser: len={len(lab.ProjectUser())}' + assert len(lab.Protocol()) == 2, f'Check Protocol: len={len(lab.Protocol())}' + assert len(lab.ProtocolType()) == 2, \ + f'Check ProtocolType: len={len(lab.ProtocolType())}' + + labs, _ = lab_csv + for this_lab in labs[1:]: + lab_values = this_lab.split(",") + assert (lab.Lab & {'lab': lab_values[0]} + ).fetch1('lab_name') == lab_values[1] + + projects, _ = lab_project_csv + for this_project in projects[1:]: + project_values = this_project.split(",") + assert (lab.Project & {'project': project_values[0]} + ).fetch1('project_description') == project_values[1] + + protocols, _ = lab_protocol_csv + for this_protocol in protocols[1:]: + protocol_values = this_protocol.split(",") + assert (lab.Protocol & {'protocol': protocol_values[0]} + ).fetch1('protocol_type') == protocol_values[1] + + + +def test_ingest_subjects(pipeline, subjects_csv, subjects_part_csv, ingest_subjects): + """Check length of subject.Subject""" + subject = pipeline['subject'] + assert len(subject.Subject()) == 2, f'Check Subject: len={len(subject.Subject())}' + assert len(subject.Subject.Protocol()) == 2, \ + f'Check Subject.Protocol: len={len(subject.Subject.Protocol())}' + assert len(subject.Subject.User()) == 2, \ + f'Check Subject.User: len={len(subject.Subject.User())}' + + subjects, _ = subjects_csv + subjects_parts, _ = subjects_part_csv + for this_subject in subjects[1:]: + subject_values = this_subject.split(",") + assert (subject.Subject & {'subject': subject_values[0]} + ).fetch1('subject_description') == subject_values[3] + for this_subject in subjects_parts[1:]: + subject_values = this_subject.split(",") + assert (subject.Subject.Protocol & {'subject': subject_values[0]} + ).fetch1('protocol') == subject_values[1] + assert (subject.Subject.User & {'subject': subject_values[0]} + ).fetch1('user') == subject_values[2] + + + +def test_ingest_sessions(pipeline, sessions_csv, ingest_sessions): + """Check length/contents of Session.SessionDirectory""" + session = pipeline['session'] + assert len(session.Session()) == 3, f'Check Session: len={len(session.Session())}' + assert len(session.ProjectSession()) == 3, \ + f'Check ProjectSession: len={len(session.ProjectSession())}' + + sessions, _ = sessions_csv + for sess in sessions[1:]: + sess = sess.split(",") + assert (session.SessionDirectory & {'subject': sess[0]} + & {'session_datetime': sess[2]}).fetch1('session_dir') == sess[3] diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py new file mode 100644 index 0000000..323a3ea --- /dev/null +++ b/tests/test_pipeline_generation.py @@ -0,0 +1,43 @@ +'''Test pipeline construction + 1. Assert lab link to within-schema children + 2. Assert lab link to subject + 3. Assert subject link to session +''' + +__all__ = ['pipeline'] + +from . import pipeline + + +def test_generate_pipeline(pipeline): + session = pipeline['session'] + subject = pipeline['subject'] + lab = pipeline['lab'] + + # test connection Lab->schema children, and Lab->Subject.Lab + lab_membership, loc_tbl, subject_lab_tbl = \ + lab.Lab.children(as_objects=True) + assert lab_membership.full_table_name == lab.LabMembership.full_table_name + assert loc_tbl.full_table_name == lab.Location.full_table_name + assert subject_lab_tbl.full_table_name == \ + subject.Subject.Lab.full_table_name + + # test connection Subject -> schema children + session_tbl, _, subject_line_tbl, subject_protocol_tbl, subject_source_tbl, \ + subject_strain_tbl, subject_user_tbl, subject_cull_tbl, subject_death_tbl,\ + subject_zygotsity_tbl = subject.Subject.children(as_objects=True) + assert session_tbl.full_table_name == session.Session.full_table_name + assert subject_line_tbl.full_table_name == subject.Subject.Line.full_table_name + assert subject_protocol_tbl.full_table_name == \ + subject.Subject.Protocol.full_table_name + assert subject_source_tbl.full_table_name == subject.Subject.Source.full_table_name + assert subject_strain_tbl.full_table_name == subject.Subject.Strain.full_table_name + assert subject_user_tbl.full_table_name == subject.Subject.User.full_table_name + assert subject_cull_tbl.full_table_name == \ + subject.SubjectCullMethod.full_table_name + assert subject_death_tbl.full_table_name == subject.SubjectDeath.full_table_name + assert subject_zygotsity_tbl.full_table_name == subject.Zygosity.full_table_name + + # test connection Subject->Session + subject_tbl, *_ = session.Session.parents(as_objects=True) + assert subject_tbl.full_table_name == subject.Subject.full_table_name diff --git a/user_data/lab/keywords.csv b/user_data/lab/keywords.csv new file mode 100644 index 0000000..547039b --- /dev/null +++ b/user_data/lab/keywords.csv @@ -0,0 +1,4 @@ +project,keyword +ProjA,Study +ProjA,Example +ProjB,Alternate \ No newline at end of file diff --git a/user_data/lab/labs.csv b/user_data/lab/labs.csv new file mode 100644 index 0000000..33546db --- /dev/null +++ b/user_data/lab/labs.csv @@ -0,0 +1,3 @@ +lab,lab_name,institution,address,time_zone,location,location_description +LabA,The Example Lab,Example Uni,'221B Baker St,London NW1 6XE,UK',UTC+0,Example Building,'2nd floor lab dedicated to all fictional experiments.' +LabB,The Other Lab,Other Uni,'Oxford OX1 2JD, United Kingdom',UTC+0,Other Building,'fictional campus dedicated to imaginary experiments.' \ No newline at end of file diff --git a/user_data/lab/project_users.csv b/user_data/lab/project_users.csv new file mode 100644 index 0000000..fe21d31 --- /dev/null +++ b/user_data/lab/project_users.csv @@ -0,0 +1,6 @@ +user,project +Sherlock,ProjA +Sherlock,ProjB +Watson,ProjB +Dr. Candace Pert,ProjA +User1,ProjA diff --git a/user_data/lab/projects.csv b/user_data/lab/projects.csv new file mode 100644 index 0000000..1879c1c --- /dev/null +++ b/user_data/lab/projects.csv @@ -0,0 +1,3 @@ +project,project_description,repository_url,repository_name,codeurl +ProjA,Example project to populate element-lab,https://github.com/datajoint/element-lab/,element-lab,https://github.com/datajoint/element-lab/tree/main/element_lab +ProjB,Other example project to populate element-lab,https://github.com/datajoint/element-session/,element-session,https://github.com/datajoint/element-session/tree/main/element_session diff --git a/user_data/lab/protocols.csv b/user_data/lab/protocols.csv new file mode 100644 index 0000000..dc3fd9a --- /dev/null +++ b/user_data/lab/protocols.csv @@ -0,0 +1,3 @@ +protocol,protocol_type,protocol_description +ProtA,IRB expedited review,Protocol for managing data ingestion +ProtB,Alternative Method,Limited protocol for piloting only \ No newline at end of file diff --git a/user_data/lab/publications.csv b/user_data/lab/publications.csv new file mode 100644 index 0000000..420668e --- /dev/null +++ b/user_data/lab/publications.csv @@ -0,0 +1,3 @@ +project,publication +ProjA,arXiv:1807.11104 +ProjA,arXiv:1807.11104v1 \ No newline at end of file diff --git a/user_data/lab/sources.csv b/user_data/lab/sources.csv new file mode 100644 index 0000000..dc9eafb --- /dev/null +++ b/user_data/lab/sources.csv @@ -0,0 +1,2 @@ +source, source_name, contact_details, source_description +Provider1, Example Provider, +44 1632 960663 / Example@Provider.com, UK-based supplier of lab subjects mus musculus \ No newline at end of file diff --git a/user_data/lab/users.csv b/user_data/lab/users.csv new file mode 100644 index 0000000..6d6a8bd --- /dev/null +++ b/user_data/lab/users.csv @@ -0,0 +1,5 @@ +lab,user,user_role,user_email,user_cellphone +LabA,Sherlock,PI,Sherlock@BakerSt.com,+44 20 7946 0344 +LabA,Watson,Dr,DrWatson@BakerSt.com,+44 73 8389 1763 +LabB,Dr. Candace Pert,PI,Pert@gmail.com,+44 74 4046 5899 +LabA,User1,Lab Tech,fake@email.com,+44 1632 960103 diff --git a/user_data/session/sessions.csv b/user_data/session/sessions.csv new file mode 100644 index 0000000..1d83729 --- /dev/null +++ b/user_data/session/sessions.csv @@ -0,0 +1,4 @@ +subject,project,session_datetime,session_dir,session_note +subject5,ProjA,2020-04-15 11:16:38,/subject5/session1,'Successful data collection, no notes' +subject5,ProjA,2020-05-12 04:13:07,subject5\session1,'Data collection notes' +subject6,ProjA,2021-06-02 14:04:22,/subject6/session1,'Ambient temp abnormally low' diff --git a/user_data/subject/allele.csv b/user_data/subject/allele.csv new file mode 100644 index 0000000..ee4c9ea --- /dev/null +++ b/user_data/subject/allele.csv @@ -0,0 +1,2 @@ +allele, allele_standard_name,zygosity +Ex,Example,Absent \ No newline at end of file diff --git a/user_data/subject/line.csv b/user_data/subject/line.csv new file mode 100644 index 0000000..79b813d --- /dev/null +++ b/user_data/subject/line.csv @@ -0,0 +1,2 @@ +line, line_description, target_phenotype, is_active +C57BL/6J \ No newline at end of file diff --git a/user_data/subject/source.csv b/user_data/subject/source.csv new file mode 100644 index 0000000..5437003 --- /dev/null +++ b/user_data/subject/source.csv @@ -0,0 +1 @@ +allele, source_identifier, source_url, expression_data_url diff --git a/user_data/subject/strain.csv b/user_data/subject/strain.csv new file mode 100644 index 0000000..e4b7430 --- /dev/null +++ b/user_data/subject/strain.csv @@ -0,0 +1 @@ +strain, strain_standard_name, strain_desc diff --git a/user_data/subject/subjects.csv b/user_data/subject/subjects.csv new file mode 100644 index 0000000..c11552f --- /dev/null +++ b/user_data/subject/subjects.csv @@ -0,0 +1,6 @@ +subject,sex,subject_birth_date,subject_description,death_date,cull_method +subject5,F,2020-01-01 00:00:01,rich,2020-10-02 00:00:01,natural causes +subject6,M,2020-01-01 00:00:01,manuel,2020-10-03 00:00:01,natural causes +subjectX,F,2020-01-01 00:00:01,manuel,2020-10-03 00:00:01,natural causes +subjectY,M,2020-01-01 00:00:01,manuel,2020-10-03 00:00:01,natural causes +subjectZ,M,2020-01-01 00:00:01,manuel,2020-10-03 00:00:01,natural causes diff --git a/user_data/subject/subjects_part.csv b/user_data/subject/subjects_part.csv new file mode 100644 index 0000000..ef99174 --- /dev/null +++ b/user_data/subject/subjects_part.csv @@ -0,0 +1,3 @@ +subject,protocol,user,line,strain,source,lab +subject6,ProtA,User1,line,strain,source,LabA +subject5,ProtA,User1,line,strain,source,LabA diff --git a/workflow_animal/__init__.py b/workflow_animal/__init__.py deleted file mode 100644 index ae798f2..0000000 --- a/workflow_animal/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -__author__ = "DataJoint NEURO" -__date__ = "March 18, 2021" -__version__ = "0.0.1" - -__all__ = ['__author__', '__version__', '__date__'] diff --git a/workflow_animal/pipeline.py b/workflow_animal/pipeline.py deleted file mode 100644 index d84f549..0000000 --- a/workflow_animal/pipeline.py +++ /dev/null @@ -1,23 +0,0 @@ -import datajoint as dj -from element_lab import lab -from element_animal import subject, genotyping -from element_lab.lab import Source, Lab, Protocol, User, Location - - -if 'custom' not in dj.config: - dj.config['custom'] = {} - -db_prefix = dj.config['custom'].get('database.prefix', '') - - -# ---------------------------------- Activate "lab" schema ------------------------------- - -lab.activate(db_prefix + 'lab') - - -# ------------------------- Activate "subject" and "genotyping" schema ------------------- - -subject.activate(db_prefix + 'subject', linking_module=__name__) - -# Omit this schema if genotying is not needed -genotyping.activate(db_prefix + 'genotyping', db_prefix + 'subject', linking_module=__name__) diff --git a/workflow_session/__init__.py b/workflow_session/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workflow_session/export.py b/workflow_session/export.py new file mode 100644 index 0000000..5a92c2f --- /dev/null +++ b/workflow_session/export.py @@ -0,0 +1,3 @@ +import datajoint as dj + +from element_session.export.nwb import session_to_nwb diff --git a/workflow_session/ingest.py b/workflow_session/ingest.py new file mode 100644 index 0000000..3d0c0df --- /dev/null +++ b/workflow_session/ingest.py @@ -0,0 +1,109 @@ +import csv +from workflow_session.pipeline import lab, subject, session + + +def ingest_general(csvs, tables, + skip_duplicates=True, verbose=True): + """ + Inserts data from a series of csvs into their corresponding table: + e.g., ingest_general(['./lab_data.csv', './proj_data.csv'], + [lab.Lab(),lab.Project()] + ingest_general(csvs, tables, skip_duplicates=True) + :param csvs: list of relative paths to CSV files + :param tables: list of datajoint tables with () + :param verbose: print number inserted (i.e., table length change) + """ + for csv_filepath, table in zip(csvs, tables): + with open(csv_filepath, newline='') as f: + data = list(csv.DictReader(f, delimiter=',')) + if verbose: + prev_len = len(table) + table.insert(data, skip_duplicates=skip_duplicates, + # Ignore extra fields because some CSVs feed multiple tables + ignore_extra_fields=True) + if verbose: + insert_len = len(table) - prev_len # report length change + print(f'\n---- Inserting {insert_len} entry(s) ' + + f'into {table.table_name} ----') + + + +def ingest_lab(lab_csv_path='./user_data/lab/labs.csv', + project_csv_path='./user_data/lab/projects.csv', + publication_csv_path='./user_data/lab/publications.csv', + keyword_csv_path='./user_data/lab/keywords.csv', + protocol_csv_path='./user_data/lab/protocols.csv', + users_csv_path='./user_data/lab/users.csv', + project_user_csv_path='./user_data/lab/project_users.csv', + skip_duplicates=True, verbose=True): + """ + Inserts data from a CSVs into their corresponding lab schema tables. + By default, uses data from workflow_session/user_data/lab/ + :param lab_csv_path: relative path of lab csv + :param project_csv_path: relative path of project csv + :param publication_csv_path: relative path of publication csv + :param keyword_csv_path: relative path of keyword csv + :param protocol_csv_path: relative path of protocol csv + :param users_csv_path: relative path of users csv + :param project_user_csv_path: relative path of project users csv + :param skip_duplicates=True: datajoint insert function param + :param verbose: print number inserted (i.e., table length change) + """ + + # List with repeats for when mult dj.tables fed by same CSV + csvs = [lab_csv_path, lab_csv_path, + project_csv_path, project_csv_path, + publication_csv_path, keyword_csv_path, + protocol_csv_path, protocol_csv_path, + users_csv_path, users_csv_path, users_csv_path, + project_user_csv_path] + tables = [lab.Lab(), lab.Location(), + lab.Project(), lab.ProjectSourceCode(), + lab.ProjectPublication(), lab.ProjectKeywords(), + lab.ProtocolType(), lab.Protocol(), + lab.UserRole(), lab.User(), lab.LabMembership(), + lab.ProjectUser()] + + ingest_general(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) + + +def ingest_subjects(subject_csv_path='./user_data/subject/subjects.csv', + subject_part_csv_path='./user_data/subject/subjects_part.csv', + skip_duplicates=True, verbose=True): + """ + Inserts data from a subject csv into corresponding subject schema tables + By default, uses data from workflow_session/user_data/subject/ + :param subject_csv_path: relative path of csv for subject data + :param subject_part_csv_path: relative path of csv for subject part tables + :param skip_duplicates=True: datajoint insert function param + :param verbose: print number inserted (i.e., table length change) + """ + csvs = [subject_csv_path, subject_csv_path, subject_csv_path, + subject_part_csv_path, subject_part_csv_path, subject_part_csv_path] + tables = [subject.Subject(), subject.SubjectDeath(), subject.SubjectCullMethod(), + subject.Subject.Protocol(), subject.Subject.User(), subject.Subject.Lab()] + + ingest_general(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) + + + +def ingest_sessions(session_csv_path='./user_data/session/sessions.csv', + skip_duplicates=True, verbose=True): + """ + Inserts data from a sessions csv into corresponding session schema tables + By default, uses data from workflow_session/user_data/session/ + :param session_csv_path: relative path of session csv + :param skip_duplicates=True: datajoint insert function param + :param verbose: print number inserted (i.e., table length change) + """ + csvs = [session_csv_path, session_csv_path, session_csv_path, session_csv_path] + tables = [session.Session(), session.SessionDirectory(), + session.SessionNote(), session.ProjectSession()] + + ingest_general(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) + + +if __name__ == '__main__': + ingest_lab() + ingest_subjects() + ingest_sessions() diff --git a/workflow_session/paths.py b/workflow_session/paths.py new file mode 100644 index 0000000..53ea76b --- /dev/null +++ b/workflow_session/paths.py @@ -0,0 +1,7 @@ + + +def get_session_directory(session_key: dict) -> str: + from .pipeline import session + session_dir = (session.SessionDirectory + & session_key).fetch1('session_dir') + return session_dir diff --git a/workflow_session/pipeline.py b/workflow_session/pipeline.py new file mode 100644 index 0000000..dc56017 --- /dev/null +++ b/workflow_session/pipeline.py @@ -0,0 +1,34 @@ +import datajoint as dj + +from element_lab import lab +from element_animal import subject, genotyping +from element_session import session + +from element_animal.subject import Subject +from element_animal.genotyping import Sequence, BreedingPair, Cage,\ + SubjectCaging, GenotypeTest +from element_lab.lab import Source, Lab, Protocol, User, Project, ProjectUser, \ + ProjectKeywords, ProjectPublication, ProjectSourceCode +from element_session.session import Session, SessionDirectory, SessionExperimenter, \ + SessionNote, ProjectSession + +if 'custom' not in dj.config: + dj.config['custom'] = {} + +db_prefix = dj.config['custom'].get('database.prefix', '') + +__all__ = ['genotyping', 'session', 'Subject', 'Source', 'Lab', 'Protocol', 'User', + 'Project', 'ProjectKeywords', 'ProjectPublication', 'ProjectSourceCode', + 'ProjectUser', 'Session', 'SessionDirectory', 'SessionExperimenter', + 'SessionNote', 'ProjectSession'] + +# Activate "lab", "subject", "session", "genotyping" schemas ------------- + +lab.activate(db_prefix + 'lab') + +subject.activate(db_prefix + 'subject', linking_module=__name__) + +Experimenter = lab.User +session.activate(db_prefix + 'session', linking_module=__name__) + +genotyping.activate(db_prefix + 'genotyping', db_prefix + 'subject', linking_module=__name__) diff --git a/workflow_session/version.py b/workflow_session/version.py new file mode 100644 index 0000000..77ac34d --- /dev/null +++ b/workflow_session/version.py @@ -0,0 +1,2 @@ +"""Package metadata.""" +__version__ = '0.0.0b2'