Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ development command line options.
All those options would otherwise be hidden from the user visible (`--help`)
interface, unless this env variable is set to non-empty value

- `DANDI_API_KEY` -- avoids using keyrings, thus making it possible to
- `DANDI_GIRDER_API_KEY` -- avoids using keyrings, thus making it possible to
"temporarily" use another account etc.

- `DANDI_LOG_LEVEL` -- set log level. By default `INFO`, should be an int (`10` - `DEBUG`).
Expand Down
14 changes: 7 additions & 7 deletions dandi/cli/cmd_download.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

import click
from .base import devel_option, map_to_click_exceptions
from .base import map_to_click_exceptions


class ChoiceList(click.ParamType):
Expand Down Expand Up @@ -78,12 +78,12 @@ def get_metavar(self, param):
# type=click.Choice(["require", "skip", "ignore"]),
# default="require",
# )
@devel_option(
"--develop-debug",
help="For development: do not use pyout callbacks, do not swallow exception",
default=False,
is_flag=True,
)
# @devel_option(
# "--develop-debug",
# help="For development: do not use pyout callbacks, do not swallow exception",
# default=False,
# is_flag=True,
# )
@click.argument("url", nargs=-1)
@map_to_click_exceptions
def download(url, output_dir, existing, jobs, format, download_types):
Expand Down
23 changes: 19 additions & 4 deletions dandi/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,35 +75,50 @@
dandiset_metadata_file = "dandiset.yaml"
dandiset_identifier_regex = "^[0-9]{6}$"

dandi_instance = namedtuple("dandi_instance", ("girder", "gui", "redirector", "api"))
dandi_instance = namedtuple(
"dandi_instance", ("metadata_version", "girder", "gui", "redirector", "api")
)

# So it could be easily mapped to external IP (e.g. from within VM)
# to test against instance running outside of current environment
instancehost = os.environ.get("DANDI_INSTANCEHOST", "localhost")

known_instances = {
"local-girder-only": dandi_instance(
f"http://{instancehost}:8080", None, None, None
0, f"http://{instancehost}:8080", None, None, None
), # just pure girder
# Redirector: TODO https://github.com/dandi/dandiarchive/issues/139
"local-docker": dandi_instance(
0,
f"http://{instancehost}:8080",
f"http://{instancehost}:8085",
None,
f"http://{instancehost}:9000", # ATM it is minio, not sure where /api etc
# may be https://github.com/dandi/dandi-publish/pull/71 would help
),
"local-docker-tests": dandi_instance(
0,
f"http://{instancehost}:8081",
f"http://{instancehost}:8086",
f"http://{instancehost}:8079",
f"http://{instancehost}:8000/api",
None,
),
"dandi": dandi_instance(
0,
"https://girder.dandiarchive.org",
"https://gui.dandiarchive.org",
"https://dandiarchive.org",
"https://publish.dandiarchive.org/api", # ? might become api.
None, # publish. is gone, superseded by API which did not yet fully superseded the rest
),
"dandi-api": dandi_instance(
1,
None,
"https://gui-beta-dandiarchive-org.netlify.app",
None,
"https://api.dandiarchive.org/api",
),
"dandi-api-local-docker-tests": dandi_instance(
1, None, None, None, f"http://{instancehost}:8000/api"
),
}
# to map back url: name
Expand Down
162 changes: 126 additions & 36 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import requests

from . import get_logger
from .consts import MAX_CHUNK_SIZE
from .consts import MAX_CHUNK_SIZE, known_instances_rev
from .girder import keyring_lookup
from .support.digests import Digester

lgr = get_logger()
Expand Down Expand Up @@ -219,7 +220,29 @@ class DandiAPIClient(RESTFullAPIClient):
def __init__(self, api_url, token=None):
super().__init__(api_url)
if token is not None:
self._headers["Authorization"] = f"token {token}"
self.authenticate(token)

def authenticate(self, token):
self._headers["Authorization"] = f"token {token}"

def dandi_authenticate(self):
# Shortcut for advanced folks
api_key = os.environ.get("DANDI_API_KEY", None)
if api_key:
self.authenticate(api_key)
lgr.debug("Successfully authenticated using the key from the envvar")
return
if self.api_url in known_instances_rev:
client_name = known_instances_rev[self.api_url]
else:
raise NotImplementedError("TODO client name derivation for keyring")
app_id = f"dandi-api-{client_name}"
keyring_backend, api_key = keyring_lookup(app_id, "key")
if not api_key:
api_key = input(f"Please provide API Key for {client_name}: ")
keyring_backend.set_password(app_id, "key", api_key)
lgr.debug("Stored key in keyring")
self.authenticate(api_key)

def get_asset(self, dandiset_id, version, uuid):
"""
Expand Down Expand Up @@ -309,6 +332,49 @@ def _migrate_dandiset_metadata(cls, dandiset):
return dandiset

def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath):
"""
Parameters
----------
dandiset_id: str
the ID of the Dandiset to which to upload the file
version_id: str
the ID of the version of the Dandiset to which to upload the file
asset_path: str
the POSIX path at which the uploaded file will be placed on the
server
asset_metadata: dict
metadata for the uploaded asset file
filepath: str or PathLike
the path to the local file to upload
"""
for r in self.iter_upload(
dandiset_id, version_id, asset_path, asset_metadata, filepath
):
if r["status"] == "validating":
sleep(0.1)

def iter_upload(
self, dandiset_id, version_id, asset_path, asset_metadata, filepath
):
"""
Parameters
----------
dandiset_id: str
the ID of the Dandiset to which to upload the file
version_id: str
the ID of the version of the Dandiset to which to upload the file
asset_path: str
the POSIX path at which the uploaded file will be placed on the
server
asset_metadata: dict
metadata for the uploaded asset file
filepath: str or PathLike
the path to the local file to upload

Returns
-------
a generator of `dict`s containing at least a ``"status"`` key
"""
filehash = Digester(["sha256"])(filepath)["sha256"]
lgr.debug("Calculated sha256 digest of %s for %s", filehash, filepath)
try:
Expand All @@ -323,51 +389,61 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath):
lgr.debug("Blob is already uploaded to server")
blob_exists = True
if not blob_exists:
total_size = os.path.getsize(filepath)
lgr.debug("Beginning upload")
resp = self.post(
"/uploads/initialize/",
json={
"file_name": f"{dandiset_id}/{version_id}/{asset_path}",
"file_size": os.path.getsize(filepath),
"file_size": total_size,
},
)
object_key = resp["object_key"]
upload_id = resp["upload_id"]
parts_out = []
with open(filepath, "rb") as fp:
for part in resp["parts"]:
chunk = fp.read(part["size"])
if len(chunk) != part["size"]:
raise RuntimeError(
f"End of file {filepath} reached unexpectedly early"
bytes_uploaded = 0
storage = RESTFullAPIClient("http://nil.nil")
with storage.session():
with open(filepath, "rb") as fp:
for part in resp["parts"]:
chunk = fp.read(part["size"])
if len(chunk) != part["size"]:
raise RuntimeError(
f"End of file {filepath} reached unexpectedly early"
)
lgr.debug(
"Uploading part %d (%d bytes)",
part["part_number"],
part["size"],
)
lgr.debug(
"Uploading part %d (%d bytes)",
part["part_number"],
part["size"],
)
r = self.put(part["upload_url"], data=chunk, json_resp=False)
parts_out.append(
{
"part_number": part["part_number"],
"size": part["size"],
"etag": r.headers["ETag"],
r = storage.put(part["upload_url"], data=chunk, json_resp=False)
bytes_uploaded += len(chunk)
yield {
"status": "uploading",
"upload": 100 * bytes_uploaded / total_size,
"current": bytes_uploaded,
}
)
lgr.debug("Completing upload")
resp = self.post(
"/uploads/complete/",
json={
"object_key": object_key,
"upload_id": upload_id,
"parts": parts_out,
},
)
self.post(resp["complete_url"], data=resp["body"], json_resp=False)
self.post(
"/uploads/validate/",
json={"sha256": filehash, "object_key": object_key},
)
parts_out.append(
{
"part_number": part["part_number"],
"size": part["size"],
"etag": r.headers["ETag"],
}
)
lgr.debug("Completing upload")
resp = self.post(
"/uploads/complete/",
json={
"object_key": object_key,
"upload_id": upload_id,
"parts": parts_out,
},
)
storage.post(resp["complete_url"], data=resp["body"], json_resp=False)
self.post(
"/uploads/validate/",
json={"sha256": filehash, "object_key": object_key},
)
while True:
lgr.debug("Waiting for server-side validation to complete")
resp = self.get(f"/uploads/validations/{filehash}/")
Expand All @@ -378,12 +454,14 @@ def upload(self, dandiset_id, version_id, asset_path, asset_metadata, filepath):
f" Error reported: {resp.get('error')}"
)
break
sleep(0.1)
yield {"status": "validating"}
lgr.debug("Assigning asset blob to dandiset & version")
yield {"status": "producing asset"}
self.post(
f"/dandisets/{dandiset_id}/versions/{version_id}/assets/",
json={"path": asset_path, "metadata": asset_metadata, "sha256": filehash},
)
yield {"status": "done"}

def create_dandiset(self, name, metadata):
return self.post("/dandisets/", json={"name": name, "metadata": metadata})
Expand Down Expand Up @@ -429,3 +507,15 @@ def download_assets_directory(
self.download_asset(
dandiset_id, version, a["uuid"], filepath, chunk_size=chunk_size
)

def get_asset_bypath(self, dandiset_id, version, asset_path):
try:
# Weed out any assets that happen to have the given path as a
# proper prefix:
(asset,) = (
a
for a in self.get_dandiset_assets(dandiset_id, version, path=asset_path)
if a["path"] == asset_path
)
except ValueError:
return None
Loading