From 6ba45daf7c00d6cbffd33aed91a984ad28419f56 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 24 Nov 2020 15:17:41 -0500 Subject: [PATCH 01/12] Basic updates for new API --- dandi/dandiapi.py | 97 ++++--------------- .../dandiarchive-docker/docker-compose.yml | 37 ++++--- 2 files changed, 41 insertions(+), 93 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index beae9ffbe..6a10b4179 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -1,9 +1,7 @@ from contextlib import contextmanager - import requests from . import get_logger -from .utils import ensure_datetime from .consts import MAX_CHUNK_SIZE lgr = get_logger() @@ -147,10 +145,10 @@ def send_request( def get_url(self, path): # Construct the url - if self.api_url.endswith("/") and path.startswith("/"): - path = path[1:] - url = self.api_url + path - return url + if path.lower().startswith(("http://", "https://")): + return path + else: + return self.api_url.rstrip("/") + "/" + path.lstrip("/") def get(self, path, parameters=None, json_resp=True): """ @@ -229,89 +227,28 @@ def get_dandiset(self, dandiset_id, version): self.get(f"/dandisets/{dandiset_id}/versions/{version}/") ) - def get_dandiset_assets(self, dandiset_id, version, location=None, page_size=None): - """A generator to provide asset records - """ - if location is not None: - raise NotImplementedError( - "location specific query. See https://github.com/dandi/dandi-publish/issues/77" - ) - # although we could just provide ad-hoc implementation here for now. TODO - if page_size is not None: - raise NotImplementedError("paginated query is not supported yet") - page_size = 1000000 + def get_dandiset_assets(self, dandiset_id, version, page_size=None): + """ A generator to provide asset records """ resp = self.get( f"/dandisets/{dandiset_id}/versions/{version}/assets/", parameters={"page_size": page_size}, ) - try: - assert not resp.get( - "next" - ), "ATM we do not support pagination and result should have not been paginated" - assert not resp.get("prev") - results = resp.get("results", []) - assert len(results) == resp.get("count") - # Just some sanity checks for now, but might change, see - # https://github.com/dandi/dandi-publish/issues/79 - assert all( - r.get("version", {}).get("dandiset", {}).get("identifier") - == dandiset_id - for r in results - ) - assert all(r.get("version", {}).get("version") == version for r in results) - except AssertionError: - lgr.error( - f"Some expectations on returned /assets/ for {dandiset_id}@{version} are violated" - ) - raise - # Things might change, so let's just return only "relevant" ATM information - # under assumption that assets belong to the current version of the dataset requested - # results_ = [ - # {k: r[k] for k in ("path", "uuid", "size", "sha256", "metadata") if k in r} - # for r in results - # ] - for r in results: - # check for paranoid Yarik with current multitude of checksums - # r['sha256'] is what "dandi-publish" computed, but then - # metadata could contain multiple digests computed upon upload - metadata = r.get("metadata") - if ( - "sha256" in r - and "sha256" in metadata - and metadata["sha256"] != r["sha256"] - ): - lgr.warning("sha256 mismatch for %s" % str(r)) - # There is no "modified" time stamp and "updated" also shows something - # completely different, so if "modified" is not there -- we will try to - # get it from metadata - if "modified" not in r and metadata: - uploaded_mtime = metadata.get("uploaded_mtime") - if uploaded_mtime: - r["modified"] = ensure_datetime(uploaded_mtime) - yield r - - def get_dandiset_and_assets(self, dandiset_id, version, location=None): + while True: + yield from resp["results"] + if "next" in resp: + resp = self.get(resp["next"]) + else: + break + + def get_dandiset_and_assets(self, dandiset_id, version): """This is pretty much an adapter to provide "harmonized" output in both girder and DANDI api clients. - Harmonization should happen toward DADNDI API BUT AFAIK it is still influx + Harmonization should happen toward DANDI API BUT AFAIK it is still influx """ - # Fun begins! - location_ = "/" + location if location else "" - lgr.info(f"Traversing {dandiset_id}{location_} (version: {version})") - - # TODO: get all assets - # 1. includes sha256, created, updated but those are of "girder" level - # so lack "uploaded_mtime" and uploaded_nwb_object_id forbidding logic for - # deducing necessity to update/move. But we still might want to rely on its - # sha256 instead of metadata since older uploads would not have that metadata - # in them - # 2. there is no API to list assets given a location - # - # Get dandiset information + lgr.info(f"Traversing {dandiset_id} (version: {version})") dandiset = self.get_dandiset(dandiset_id, version) - # TODO: location - assets = self.get_dandiset_assets(dandiset_id, version, location=location) + assets = self.get_dandiset_assets(dandiset_id, version) return dandiset, assets def get_download_file_iter( diff --git a/dandi/tests/data/dandiarchive-docker/docker-compose.yml b/dandi/tests/data/dandiarchive-docker/docker-compose.yml index 0fa6b8ed2..844766976 100644 --- a/dandi/tests/data/dandiarchive-docker/docker-compose.yml +++ b/dandi/tests/data/dandiarchive-docker/docker-compose.yml @@ -1,7 +1,7 @@ # Based on # , -# , and -# , +# , and +# , # but using images uploaded to Docker Hub instead of building them locally. version: '2.1' @@ -50,8 +50,10 @@ services: #PUBLISH_API_URL: http://localhost:8000/api django: - image: dandiarchive/dandiarchive-publish + image: dandiarchive/dandiarchive-api command: ["./manage.py", "runserver", "0.0.0.0:8000"] + # Log printing via Rich is enhanced by a TTY + tty: true depends_on: girder: condition: service_started @@ -68,22 +70,25 @@ services: DJANGO_DANDI_GIRDER_API_URL: DJANGO_DANDI_GIRDER_API_KEY: DJANGO_DATABASE_URL: postgres://postgres:postgres@postgres:5432/django - DJANGO_MINIO_STORAGE_ACCESS_KEY: minioUserAccessKey + DJANGO_MINIO_STORAGE_ACCESS_KEY: minioAccessKey DJANGO_MINIO_STORAGE_ENDPOINT: minio:9000 - DJANGO_MINIO_STORAGE_SECRET_KEY: minioUserSecretKey + DJANGO_MINIO_STORAGE_SECRET_KEY: minioSecretKey DJANGO_STORAGE_BUCKET_NAME: django-storage DJANGO_MINIO_STORAGE_MEDIA_URL: http://localhost:9000/django-storage ports: - "8000:8000" celery: - image: dandiarchive/dandiarchive-publish + image: dandiarchive/dandiarchive-api command: [ - "celery", "worker", - "--app", "dandi.celery", - "--loglevel", "info", + "celery", + "--app", "dandiapi.celery", + "worker", + "--loglevel", "INFO", "--without-heartbeat" ] + # Docker Compose does not set the TTY width, which causes Celery errors + tty: false depends_on: minio: condition: service_healthy @@ -98,16 +103,22 @@ services: DJANGO_DANDI_GIRDER_API_URL: DJANGO_DANDI_GIRDER_API_KEY: DJANGO_DATABASE_URL: postgres://postgres:postgres@postgres:5432/django - DJANGO_MINIO_STORAGE_ACCESS_KEY: minioUserAccessKey + DJANGO_MINIO_STORAGE_ACCESS_KEY: minioAccessKey DJANGO_MINIO_STORAGE_ENDPOINT: minio:9000 - DJANGO_MINIO_STORAGE_SECRET_KEY: minioUserSecretKey + DJANGO_MINIO_STORAGE_SECRET_KEY: minioSecretKey DJANGO_STORAGE_BUCKET_NAME: django-storage DJANGO_MINIO_STORAGE_MEDIA_URL: http://localhost:9000/django-storage minio: - image: girder/minio-nonroot:latest + image: minio/minio:latest + # When run with a TTY, minio prints credentials on startup + tty: true + command: ["server", "/data"] ports: - "9000:9000" + environment: + MINIO_ACCESS_KEY: minioAccessKey + MINIO_SECRET_KEY: minioSecretKey healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] interval: 7s @@ -128,7 +139,7 @@ services: retries: 5 rabbitmq: - image: rabbitmq:latest + image: rabbitmq:management ports: - "5672:5672" From 6ced11b21751f1fa7d4e9cd680b7a72b221f2c74 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 25 Nov 2020 09:57:36 -0500 Subject: [PATCH 02/12] Set API URL for Docker Compose to point to API container --- dandi/consts.py | 4 ++-- dandi/tests/data/dandiarchive-docker/docker-compose.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index db9396e57..016539e84 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -79,7 +79,7 @@ # So it could be easily mapped to external IP (e.g. from within VM) # to test against instance running outside of current environment -instancehost = os.environ.get('DANDI_INSTANCEHOST', 'localhost') +instancehost = os.environ.get("DANDI_INSTANCEHOST", "localhost") known_instances = { "local-girder-only": dandi_instance( @@ -97,7 +97,7 @@ f"http://{instancehost}:8081", f"http://{instancehost}:8086", f"http://{instancehost}:8079", - None, # TODO: https://github.com/dandi/dandi-cli/issues/164 + f"http://{instancehost}:8000/api", ), "dandi": dandi_instance( "https://girder.dandiarchive.org", diff --git a/dandi/tests/data/dandiarchive-docker/docker-compose.yml b/dandi/tests/data/dandiarchive-docker/docker-compose.yml index 844766976..53834a10f 100644 --- a/dandi/tests/data/dandiarchive-docker/docker-compose.yml +++ b/dandi/tests/data/dandiarchive-docker/docker-compose.yml @@ -47,7 +47,7 @@ services: GIRDER_URL: http://localhost:8081 GUI_URL: http://localhost:8086 ABOUT_URL: http://www.dandiarchive.org - #PUBLISH_API_URL: http://localhost:8000/api + PUBLISH_API_URL: http://localhost:8000/api django: image: dandiarchive/dandiarchive-api From 237b1db78a4e2d07b1f492ba2d4e1cfbef3d5f40 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 30 Nov 2020 16:27:09 -0500 Subject: [PATCH 03/12] upload() method for new API --- dandi/dandiapi.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 6a10b4179..d7d99c591 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -1,8 +1,11 @@ from contextlib import contextmanager +import os.path +from time import sleep import requests from . import get_logger from .consts import MAX_CHUNK_SIZE +from .support.digests import Digester lgr = get_logger() @@ -291,3 +294,71 @@ def _migrate_dandiset_metadata(cls, dandiset): if "identifier" not in dandiset_metadata and "dandiset" in dandiset_metadata: dandiset["metadata"] = dandiset_metadata.pop("dandiset") return dandiset + + def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): + filehash = Digester(["sha256"])(filepath)["sha256"] + lgr.debug("Calculated sha256 digest of %s for %s", filehash, filepath) + self.post("/uploads/validate/", json={"sha256": filehash}) + try: + self.get(f"/uploads/validations/{filehash}/") + except requests.HTTPError: + lgr.debug("Blob does not already exist on server") + blob_exists = False + else: + lgr.debug("Blob is already uploaded to server") + blob_exists = True + if not blob_exists: + lgr.debug("Beginning upload") + resp = self.post( + "/uploads/initialize/", + json={"file_name": asset_path, "file_size": os.path.getsize(filepath)}, + ) + object_key = resp["object_key"] + upload_id = resp["upload_id"] + parts_out = [] + with open(filepath, "rb") as fp: + for part in resp["parts"]: + chunk = fp.read(part["size"]) + if len(chunk) != part["size"]: + raise RuntimeError( + f"End of file {filepath} reached unexpectedly early" + ) + lgr.debug( + "Uploading part %d (%d bytes)", + part["part_number"], + part["size"], + ) + r = self.post(part["upload_url"], data=chunk, json_resp=False) + parts_out.append( + { + "part_number": part["part_number"], + "size": part["size"], + "etag": r.headers["ETag"], + } + ) + lgr.debug("Completing upload") + resp = self.post( + "/uploads/complete/", + json={ + "object_key": object_key, + "upload_id": upload_id, + "parts": parts_out, + }, + ) + self.post(resp["complete_url"]) + while True: + lgr.debug("Waiting for server-side validation to complete") + resp = self.get(f"/uploads/validations/{filehash}/") + if resp["state"] != "IN_PROGRESS": + if resp["state"] == "FAILED": + raise RuntimeError( + "Server-side asset validation failed!" + f" Error reported: {resp.get('error')}" + ) + break + sleep(0.1) + lgr.debug("Assigning asset blob to dandiset & version") + self.post( + f"/dandisets/{dandiset_id}/versions/{version_id}/assets/", + json={"path": asset_path, "metadata": asset_metadata, "sha256": filehash}, + ) From d8813cb182d3021bcff4cf845fe851edbc5383f4 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 1 Dec 2020 14:39:10 -0500 Subject: [PATCH 04/12] Test of new upload method --- dandi/dandiapi.py | 12 ++++++++++++ dandi/tests/fixtures.py | 8 +++++++- dandi/tests/test_dandiapi.py | 14 ++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 dandi/tests/test_dandiapi.py diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index d7d99c591..663a7b25d 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -19,6 +19,7 @@ class RESTFullAPIClient(object): def __init__(self, api_url): self.api_url = api_url self._session = None + self._headers = {} @contextmanager def session(self, session=None): @@ -46,6 +47,7 @@ def session(self, session=None): :param session: An existing :class:`requests.Session` object, or None. """ self._session = session if session else requests.Session() + self._session.headers.update(self._headers) try: yield self._session @@ -124,6 +126,7 @@ def send_request( if json_resp and "accept" not in _headers: _headers["accept"] = "application/json" + lgr.debug("%s %s", method.upper(), url) result = f( url, params=parameters, @@ -133,6 +136,7 @@ def send_request( headers=_headers, **kwargs, ) + lgr.debug("Response: %d", result.status_code) # If success, return the json object. Otherwise throw an exception. if not result.ok: @@ -208,6 +212,11 @@ def patch(self, path, parameters=None, data=None, json=None, json_resp=True): class DandiAPIClient(RESTFullAPIClient): + def __init__(self, api_url, token=None): + super().__init__(api_url) + if token is not None: + self._headers["Authorization"] = f"token {token}" + def get_asset(self, dandiset_id, version, uuid): """ @@ -362,3 +371,6 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): f"/dandisets/{dandiset_id}/versions/{version_id}/assets/", json={"path": asset_path, "metadata": asset_metadata, "sha256": filehash}, ) + + def create_dandiset(self, name, metadata): + return self.post("/dandisets/", json={"name": name, "metadata": metadata}) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 95189a84d..c11e023f1 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -267,6 +267,7 @@ def local_docker_compose(): ], cwd=str(LOCAL_DOCKER_DIR), env=env, + universal_newlines=True, ).split()[2] run( @@ -291,7 +292,12 @@ def local_docker_compose(): headers={"Girder-Token": publish_api_key}, ).raise_for_status() - yield {"api_key": api_key, "instance": instance, "instance_id": instance_id} + yield { + "api_key": api_key, + "instance": instance, + "instance_id": instance_id, + "django_api_key": django_api_key, + } finally: run(["docker-compose", "down", "-v"], cwd=str(LOCAL_DOCKER_DIR), check=True) diff --git a/dandi/tests/test_dandiapi.py b/dandi/tests/test_dandiapi.py new file mode 100644 index 000000000..78fb7dbc2 --- /dev/null +++ b/dandi/tests/test_dandiapi.py @@ -0,0 +1,14 @@ +from ..dandiapi import DandiAPIClient + + +def test_upload(local_docker_compose, simple1_nwb): + client = DandiAPIClient( + api_url=local_docker_compose["instance"].api, + token=local_docker_compose["django_api_key"], + ) + with client.session(): + r = client.create_dandiset(name="Upload Test", metadata={}) + dandiset_id = r["identifier"] + client.upload(dandiset_id, "draft", "testing/simple1.nwb", {}, simple1_nwb) + asset, = client.get_dandiset_assets(dandiset_id, "draft") + assert asset["path"] == "testing/simple1.nwb" From 9f6e152e7975e18ad94e1c2eb2698af44263a35e Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 1 Dec 2020 15:19:10 -0500 Subject: [PATCH 05/12] Wait for Django API Docker container to be ready --- dandi/tests/fixtures.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index c11e023f1..99b826cb0 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -5,6 +5,7 @@ from subprocess import check_output, run import shutil import tempfile +from time import sleep from dateutil.tz import tzutc import pynwb @@ -292,6 +293,16 @@ def local_docker_compose(): headers={"Girder-Token": publish_api_key}, ).raise_for_status() + for _ in range(10): + try: + requests.get(f"{instance.api}/dandisets/") + except requests.ConnectionError: + sleep(1) + else: + break + else: + raise RuntimeError("Django container did not start up in time") + yield { "api_key": api_key, "instance": instance, From 3efebf2cf5a2c57c21f525f462a7400793e0489b Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 2 Dec 2020 11:52:15 -0500 Subject: [PATCH 06/12] Correct the calls to the validation endpoints --- dandi/dandiapi.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 663a7b25d..8119cee57 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -307,12 +307,14 @@ def _migrate_dandiset_metadata(cls, dandiset): def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): filehash = Digester(["sha256"])(filepath)["sha256"] lgr.debug("Calculated sha256 digest of %s for %s", filehash, filepath) - self.post("/uploads/validate/", json={"sha256": filehash}) try: - self.get(f"/uploads/validations/{filehash}/") - except requests.HTTPError: - lgr.debug("Blob does not already exist on server") - blob_exists = False + self.post("/uploads/validate/", json={"sha256": filehash}) + except requests.HTTPError as e: + if e.response.status_code == 400: + lgr.debug("Blob does not already exist on server") + blob_exists = False + else: + raise else: lgr.debug("Blob is already uploaded to server") blob_exists = True @@ -355,6 +357,7 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): }, ) self.post(resp["complete_url"]) + self.post("/uploads/validate/", json={"sha256": filehash}) while True: lgr.debug("Waiting for server-side validation to complete") resp = self.get(f"/uploads/validations/{filehash}/") From a31bce494021d88632b6fd49e52d9e12b725a135 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 2 Dec 2020 11:54:12 -0500 Subject: [PATCH 07/12] Include dandiset ID and version ID in `file_name` --- dandi/dandiapi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 8119cee57..05af1b118 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -322,7 +322,10 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): lgr.debug("Beginning upload") resp = self.post( "/uploads/initialize/", - json={"file_name": asset_path, "file_size": os.path.getsize(filepath)}, + json={ + "file_name": f"{dandiset_id}/{version_id}/{asset_path}", + "file_size": os.path.getsize(filepath), + }, ) object_key = resp["object_key"] upload_id = resp["upload_id"] From f6fe0d80bc3e69ca2ddb473cfb1cfd90ad4b004a Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 2 Dec 2020 14:58:47 -0500 Subject: [PATCH 08/12] Fix more API calls --- dandi/dandiapi.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 05af1b118..0d4c9d9a9 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -146,7 +146,10 @@ def send_request( ) if json_resp: - return result.json() + if result.text.strip(): + return result.json() + else: + return None else: return result @@ -342,7 +345,7 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): part["part_number"], part["size"], ) - r = self.post(part["upload_url"], data=chunk, json_resp=False) + r = self.put(part["upload_url"], data=chunk, json_resp=False) parts_out.append( { "part_number": part["part_number"], @@ -359,8 +362,11 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): "parts": parts_out, }, ) - self.post(resp["complete_url"]) - self.post("/uploads/validate/", json={"sha256": filehash}) + self.post(resp["complete_url"], data=resp["body"], json_resp=False) + self.post( + "/uploads/validate/", + json={"sha256": filehash, "object_key": object_key}, + ) while True: lgr.debug("Waiting for server-side validation to complete") resp = self.get(f"/uploads/validations/{filehash}/") From 533d4b1fd77eea25a2ca445eef14f54b73ba5738 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 14 Dec 2020 08:30:44 -0500 Subject: [PATCH 09/12] Update pagination --- dandi/dandiapi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 0d4c9d9a9..82cf9f996 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -250,7 +250,7 @@ def get_dandiset_assets(self, dandiset_id, version, page_size=None): ) while True: yield from resp["results"] - if "next" in resp: + if resp.get("next"): resp = self.get(resp["next"]) else: break From cc217fa369c74e4e27f7e237c2d4fe4cc2ac15f0 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 16 Dec 2020 09:36:56 -0500 Subject: [PATCH 10/12] Download method for new API --- dandi/dandiapi.py | 30 ++++++++++++++++++++++++++++++ dandi/tests/test_dandiapi.py | 7 ++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 82cf9f996..049072530 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -1,5 +1,6 @@ from contextlib import contextmanager import os.path +from pathlib import Path from time import sleep import requests @@ -386,3 +387,32 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): def create_dandiset(self, name, metadata): return self.post("/dandisets/", json={"name": name, "metadata": metadata}) + + def download( + self, + dandiset_id, + dirpath, + asset_path="", + version="draft", + chunk_size=MAX_CHUNK_SIZE, + ): + assets = [] + resp = self.get( + f"/dandisets/{dandiset_id}/versions/draft/assets/", + parameters={"path": asset_path}, + ) + while True: + assets.extend(resp["results"]) + if resp.get("next"): + resp = self.get(resp["next"]) + else: + break + for a in assets: + filepath = Path(dirpath, a["path"]) + filepath.parent.mkdir(parents=True, exist_ok=True) + downloader = self.get_download_file_iter( + dandiset_id, version, a["uuid"], chunk_size=chunk_size + ) + with filepath.open("wb") as fp: + for chunk in downloader(): + fp.write(chunk) diff --git a/dandi/tests/test_dandiapi.py b/dandi/tests/test_dandiapi.py index 78fb7dbc2..33e7b0ea2 100644 --- a/dandi/tests/test_dandiapi.py +++ b/dandi/tests/test_dandiapi.py @@ -1,7 +1,8 @@ +import os.path from ..dandiapi import DandiAPIClient -def test_upload(local_docker_compose, simple1_nwb): +def test_upload(local_docker_compose, simple1_nwb, tmp_path): client = DandiAPIClient( api_url=local_docker_compose["instance"].api, token=local_docker_compose["django_api_key"], @@ -12,3 +13,7 @@ def test_upload(local_docker_compose, simple1_nwb): client.upload(dandiset_id, "draft", "testing/simple1.nwb", {}, simple1_nwb) asset, = client.get_dandiset_assets(dandiset_id, "draft") assert asset["path"] == "testing/simple1.nwb" + client.download(dandiset_id, tmp_path) + p, = tmp_path.glob("*/*") + assert p == tmp_path / "testing" / "simple1.nwb" + assert p.stat().st_size == os.path.getsize(simple1_nwb) From 065b540d17c91ac2e1079904685cf53e5fbc5029 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 16 Dec 2020 09:57:28 -0500 Subject: [PATCH 11/12] Simplify code --- dandi/dandiapi.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 049072530..1ae94fbe5 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -243,11 +243,11 @@ def get_dandiset(self, dandiset_id, version): self.get(f"/dandisets/{dandiset_id}/versions/{version}/") ) - def get_dandiset_assets(self, dandiset_id, version, page_size=None): + def get_dandiset_assets(self, dandiset_id, version, page_size=None, path=None): """ A generator to provide asset records """ resp = self.get( f"/dandisets/{dandiset_id}/versions/{version}/assets/", - parameters={"page_size": page_size}, + parameters={"page_size": page_size, "path": None}, ) while True: yield from resp["results"] @@ -396,17 +396,7 @@ def download( version="draft", chunk_size=MAX_CHUNK_SIZE, ): - assets = [] - resp = self.get( - f"/dandisets/{dandiset_id}/versions/draft/assets/", - parameters={"path": asset_path}, - ) - while True: - assets.extend(resp["results"]) - if resp.get("next"): - resp = self.get(resp["next"]) - else: - break + assets = list(self.get_dandiset_assets(dandiset_id, version, path=asset_path)) for a in assets: filepath = Path(dirpath, a["path"]) filepath.parent.mkdir(parents=True, exist_ok=True) From 2ef1d0012f59a7d5f3a93ee4850c6c1dd7caf928 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 16 Dec 2020 11:41:59 -0500 Subject: [PATCH 12/12] Split up download methods --- dandi/dandiapi.py | 51 ++++++++++++++++++++++++++---------- dandi/tests/test_dandiapi.py | 4 +-- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 1ae94fbe5..be2f658d3 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -388,21 +388,44 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath): def create_dandiset(self, name, metadata): return self.post("/dandisets/", json={"name": name, "metadata": metadata}) - def download( - self, - dandiset_id, - dirpath, - asset_path="", - version="draft", - chunk_size=MAX_CHUNK_SIZE, + def download_asset( + self, dandiset_id, version, asset_uuid, filepath, chunk_size=MAX_CHUNK_SIZE + ): + downloader = self.get_download_file_iter( + dandiset_id, version, asset_uuid, chunk_size=chunk_size + ) + with open(filepath, "wb") as fp: + for chunk in downloader(): + fp.write(chunk) + + def download_asset_bypath( + self, dandiset_id, version, asset_path, filepath, chunk_size=MAX_CHUNK_SIZE + ): + try: + # Weed out any assets that happen to have the given path as a + # proper prefix: + (asset,) = ( + a + for a in self.get_dandiset_assets(dandiset_id, version, path=asset_path) + if a["path"] == asset_path + ) + except ValueError: + raise RuntimeError(f"No asset found with path {asset_path!r}") + self.download_asset( + dandiset_id, version, asset["uuid"], filepath, chunk_size=chunk_size + ) + + def download_assets_directory( + self, dandiset_id, version, assets_dirpath, dirpath, chunk_size=MAX_CHUNK_SIZE ): - assets = list(self.get_dandiset_assets(dandiset_id, version, path=asset_path)) + if assets_dirpath and not assets_dirpath.endswith("/"): + assets_dirpath += "/" + assets = list( + self.get_dandiset_assets(dandiset_id, version, path=assets_dirpath) + ) for a in assets: - filepath = Path(dirpath, a["path"]) + filepath = Path(dirpath, a["path"][len(assets_dirpath) :]) filepath.parent.mkdir(parents=True, exist_ok=True) - downloader = self.get_download_file_iter( - dandiset_id, version, a["uuid"], chunk_size=chunk_size + self.download_asset( + dandiset_id, version, a["uuid"], filepath, chunk_size=chunk_size ) - with filepath.open("wb") as fp: - for chunk in downloader(): - fp.write(chunk) diff --git a/dandi/tests/test_dandiapi.py b/dandi/tests/test_dandiapi.py index 33e7b0ea2..e36758456 100644 --- a/dandi/tests/test_dandiapi.py +++ b/dandi/tests/test_dandiapi.py @@ -13,7 +13,7 @@ def test_upload(local_docker_compose, simple1_nwb, tmp_path): client.upload(dandiset_id, "draft", "testing/simple1.nwb", {}, simple1_nwb) asset, = client.get_dandiset_assets(dandiset_id, "draft") assert asset["path"] == "testing/simple1.nwb" - client.download(dandiset_id, tmp_path) - p, = tmp_path.glob("*/*") + client.download_assets_directory(dandiset_id, "draft", "", tmp_path) + p, = [p for p in tmp_path.glob("**/*") if p.is_file()] assert p == tmp_path / "testing" / "simple1.nwb" assert p.stat().st_size == os.path.getsize(simple1_nwb)