diff --git a/README.md b/README.md index 7d70b1c..efbeb62 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## A basic API client implementation for [api.varsome.com](https://api.varsome.com) -This tool contains examples for the Varsome API usage. It can be used against either the production server ([api.varsome.com](https://api.varsome.com)) or the staging server ( ([staging-api.varsome.com](https://staging-api.varsome.com))) +This tool contains examples for the Varsome API usage. It can be used against the production server ([api.varsome.com](https://api.varsome.com)), the staging server ([staging-api.varsome.com](https://staging-api.varsome.com)) or the stable api server ([stable-api.varsome.com](https://stable-api.varsome.com)) ### Staging-api environment @@ -69,6 +69,13 @@ Notice, however, that not all available annotations will be present in the `anno of the returned annotations will be available when running this script. See the "Using the client in your code" section below for how to annotate a VCF file with the annotations that are of interest to you. +*Warning*: varsome_api_annotate_vcf.py can only deal with: + +- SNPs +- small indels (up to 200bp) + +If you want to use this script please remove any variant from your VCF that does not meet the above criteria. + ### Using the client in your code Using the API client is quite straightforward. Just install the API client package and use the following in your code: @@ -77,7 +84,7 @@ Using the API client is quite straightforward. Just install the API client packa from varsome_api.client import VarSomeAPIClient # API key is not required for single variant lookups api_key = 'Your token' -api = VarSomeAPIClient(api_key) +api = VarSomeAPIClient(api_key, api_url="https://stable-api.varsome.com") # fetch information about a variant into a dictionary result = api.lookup('chr7-140453136-A-T', params={'add-source-databases': 'gnomad-exomes,refseq-transcripts'}, ref_genome='hg19') # access results e.g. the transcripts of the variant @@ -105,7 +112,7 @@ except VarSomeAPIException as e: To view available request parameters (used by the `params` method parameter), refer to an example at [api.varsome.com](https://api.varsome.com). -To understand how annotation properties are included in the JSON response, please refer to the relevant [schema](https://api.varsome.com/lookup/schema). +To understand how annotation properties are included in the JSON response, please refer to the relevant [schema](https://api.varsome.com/docs/variants/). #### JSON response wrapper @@ -171,7 +178,7 @@ To annotate the VCF file with the annotations that you are interested in, you ne ```python from varsome_api.vcf import VCFAnnotator -from vcf.parser import _Info +from vcf.parser import _Info, _encode_type class MyVCFAnnotator(VCFAnnotator): def annotate_record(self, record, variant_result): @@ -192,8 +199,9 @@ class MyVCFAnnotator(VCFAnnotator): :param vcf_template: vcf reader object :return: """ - vcf_template.infos['gnomad_exomes_AN'] = _Info('gnomad_exomes_AN', '.', 'Integer', - 'GnomAD exomes allele number value', None, None) + vcf_template.infos['gnomad_exomes_AN'] = _Info('gnomad_exomes_AN', 1, 'Integer', + 'GnomAD exomes allele number value', + None, None, _encode_type("Integer"),) # if you wish to also include the default headers # super().add_vcf_header_info(vcf_template) @@ -217,9 +225,9 @@ To obtain an API key please [contact us](mailto:support@saphetor.com). Clone the repository, after creating a virtual environment, and run: - python setup.py test + pip install tox + tox -Running the tests will install [nose](https://nose.readthedocs.io/en/latest/) in your virtual environment. To run the tests, set the `VARSOME_API_KEY` environment variable to your API token. Otherwise, tests will fail because the API will return a 401 (not authenticated) error. Be advised as well that running the tests will count towards your account request limit depending on the diff --git a/scripts/varsome_api_annotate_vcf.py b/scripts/varsome_api_annotate_vcf.py index 4cf5b6e..8a5fd09 100755 --- a/scripts/varsome_api_annotate_vcf.py +++ b/scripts/varsome_api_annotate_vcf.py @@ -15,42 +15,78 @@ # limitations under the License. import argparse -import sys from varsome_api.vcf import VCFAnnotator -__author__ = 'ckopanos' - -def annotate_vcf(argv): - parser = argparse.ArgumentParser(description='VCF Annotator command line') - parser.add_argument('-k', help='Your key to the API', type=str, metavar='API Key', required=True) - parser.add_argument('-g', help='Reference genome either hg19 or hg38', type=str, metavar='Reference Genome', - required=False, default='hg19') - parser.add_argument('-i', - help='Path to vcf file', - type=str, metavar='Input VCF File', required=True) - parser.add_argument('-o', - help='Path to output vcf file', - type=str, metavar='Output VCF File', required=False) - parser.add_argument('-p', - help='Request parameters e.g. add-all-data=1 expand-pubmed-articles=0', - type=str, metavar='Request Params', required=False, nargs='+') - parser.add_argument('-t', help='Run vcf annotator using x threads', type=int, default=3, required=False, - metavar='Number of threads') +def annotate_vcf(): + parser = argparse.ArgumentParser(description="VCF Annotator command line") + parser.add_argument( + "-k", help="Your key to the API", type=str, metavar="API Key", required=True + ) + parser.add_argument( + "-g", + help="Reference genome either hg19 or hg38", + type=str, + metavar="Reference Genome", + required=False, + default="hg19", + ) + parser.add_argument( + "-i", help="Path to vcf file", type=str, metavar="Input VCF File", required=True + ) + parser.add_argument( + "-o", + help="Path to output vcf file", + type=str, + metavar="Output VCF File", + required=False, + ) + parser.add_argument( + "-p", + help="Request parameters e.g. add-all-data=1 add-ACMG-annotation=1", + type=str, + metavar="Request Params", + required=False, + nargs="+", + ) + parser.add_argument( + "-t", + help="Run vcf annotator using x threads", + type=int, + default=3, + required=False, + metavar="Number of threads", + ) + parser.add_argument( + "-u", + help="Use specific VarSome API host url " + "(e.g. https://api.varsome.com or https://stable-api.varsome.com", + type=str, + required=False, + metavar="VarSome API host url", + ) args = parser.parse_args() api_key = args.k vcf_file = args.i output_vcf_file = args.o ref_genome = args.g num_threads = args.t + api_url = args.u request_parameters = None if args.p: - request_parameters = {param[0]: param[1] for param in [param.split("=") for param in args.p]} - vcf_annotator = VCFAnnotator(api_key=api_key, ref_genome=ref_genome, get_parameters=request_parameters, - max_threads=num_threads) + request_parameters = { + param[0]: param[1] for param in [param.split("=") for param in args.p] + } + vcf_annotator = VCFAnnotator( + api_key=api_key, + api_url=api_url, + ref_genome=ref_genome, + get_parameters=request_parameters, + max_threads=num_threads, + ) vcf_annotator.annotate(vcf_file, output_vcf_file) if __name__ == "__main__": - annotate_vcf(sys.argv[1:]) + annotate_vcf() diff --git a/scripts/varsome_api_run.py b/scripts/varsome_api_run.py index e8bc0da..eef0abf 100755 --- a/scripts/varsome_api_run.py +++ b/scripts/varsome_api_run.py @@ -16,112 +16,175 @@ import argparse import json -import sys import os +import sys from varsome_api.client import VarSomeAPIClient -__author__ = 'ckopanos' - -def annotate_variant(argv): - parser = argparse.ArgumentParser(description='Sample VarSome API calls') - parser.add_argument('-k', help='Your key to the API', type=str, metavar='API Key', required=False) - parser.add_argument('-g', help='Reference genome either hg19 or hg38', type=str, metavar='Reference Genome', - required=False, default='hg19') - parser.add_argument('-q', - help='Query to lookup in the API e.g. chr19:20082943:1:G or in case of batch request ' - 'e.g. chr15-73027478-T-C rs113488022. Don\'t use it together with the -i option', - type=str, metavar='Query', required=False, nargs='+') - parser.add_argument('-p', - help='Request parameters e.g. add-all-data=1 expand-pubmed-articles=0', - type=str, metavar='Request Params', required=False, nargs='+') - parser.add_argument('-i', - help='Path to text file with variants. It should include one variant per line. Don\'t use it ' - 'together with the -q option', - type=str, metavar='Text/CSV File one line per variant', required=False) - parser.add_argument('-o', - help='Path to output file to store variant annotations', - type=str, metavar='Output File with json entries', required=False) +def annotate_variant(): + parser = argparse.ArgumentParser(description="Sample VarSome API calls") + parser.add_argument( + "-k", help="Your key to the API", type=str, metavar="API Key", required=False + ) + parser.add_argument( + "-g", + help="Reference genome either hg19 or hg38", + type=str, + metavar="Reference Genome", + required=False, + default="hg19", + ) + parser.add_argument( + "-q", + help="Query to lookup in the API e.g. " + "chr19:20082943:1:G or in case of batch request " + "e.g. chr15-73027478-T-C rs113488022. " + "Don't use it together with the -i option", + type=str, + metavar="Query", + required=False, + nargs="+", + ) + parser.add_argument( + "-p", + help="Request parameters e.g. add-all-data=1 expand-pubmed-articles=0", + type=str, + metavar="Request Params", + required=False, + nargs="+", + ) + parser.add_argument( + "-i", + help="Path to text file with variants. " + "It should include one variant per line. Don't use it " + "together with the -q option", + type=str, + metavar="Text/CSV File one line per variant", + required=False, + ) + parser.add_argument( + "-o", + help="Path to output file to store variant annotations", + type=str, + metavar="Output File with json entries", + required=False, + ) + parser.add_argument( + "-u", + help="Use specific VarSome API host url " + "(e.g. https://api.varsome.com or https://stable-api.varsome.com", + type=str, + required=False, + metavar="VarSome API host url", + ) args = parser.parse_args() api_key = args.k query = args.q ref_genome = args.g input_file = args.i output_file = args.o + api_url = args.u if query and input_file: - sys.stderr.write("Don't specify -i and -q options together. Use only one of them\n") + sys.stderr.write( + "Don't specify -i and -q options together. Use only one of them\n" + ) sys.exit(1) if not query and not input_file: sys.stderr.write("Please either specify -i or -q options\n") sys.exit(1) if input_file and not os.path.exists(input_file): - sys.stderr.write('File %s does not exist\n' % input_file) + sys.stderr.write("File %s does not exist\n" % input_file) sys.exit(1) if output_file and os.path.exists(output_file): - sys.stderr.write('File %s already exists\n' % output_file) + sys.stderr.write("File %s already exists\n" % output_file) sys.exit(1) request_parameters = None if args.p: - request_parameters = {param[0]: param[1] for param in [param.split("=") for param in args.p]} - api = VarSomeAPIClient(api_key) + request_parameters = { + param[0]: param[1] for param in [param.split("=") for param in args.p] + } + api = VarSomeAPIClient(api_key, api_url=api_url) if query: if len(query) == 1: - result = api.lookup(query[0], params=request_parameters, ref_genome=ref_genome) + result = api.lookup( + query[0], params=request_parameters, ref_genome=ref_genome + ) else: if api_key is None: sys.exit("You need to pass an api key to perform batch requests") - result = api.batch_lookup(query, params=request_parameters, ref_genome=ref_genome) + result = api.batch_lookup( + query, params=request_parameters, ref_genome=ref_genome + ) + result = list(result) if output_file: - write_f = open(output_file, 'w') - json.dump(result, write_f, indent=4, sort_keys=True) + with open(output_file, "w") as fp: + json.dump(result, fp, indent=4, sort_keys=True) else: - sys.stdout.write(json.dumps(result, indent=4, sort_keys=True) if result else "No result") + sys.stdout.write( + json.dumps(result, indent=4, sort_keys=True) if result else "No result" + ) sys.stdout.write("\n") sys.exit(0) with open(input_file) as f: variants = f.read().splitlines() if variants: if len(variants) > 1000: - sys.stdout.write('Too many variants.. Consider using annotate_vcf instead\n') + sys.stdout.write( + "Too many variants.. Consider using annotate_vcf instead\n" + ) sys.stdout.flush() - write_f = None - if output_file: - write_f = open(output_file, 'w') try: if api_key is None: - sys.stdout.write('Without an API key, variants will be annotated one a time, ' - 'causing a 429 too many requests error after some time\n') + sys.stdout.write( + "Without an API key, variants will be annotated one a time, " + "causing a 429 too many requests error after some time\n" + ) sys.stdout.flush() results = [] for variant in variants: - result = api.lookup(variant, params=request_parameters, ref_genome=ref_genome) + result = api.lookup( + variant, params=request_parameters, ref_genome=ref_genome + ) if not result: - result = {'error': 'Could not fetch annotations for %s' % variant} + result = { + "error": "Could not fetch annotations for %s" % variant + } results.append(result) - if write_f is not None: - json.dump(results, write_f, indent=4, sort_keys=True) + if output_file: + with open(output_file, "w") as fp: + json.dump(results, fp, indent=4, sort_keys=True) else: - sys.stdout.write(json.dumps(result, indent=4, sort_keys=True) if result else "No result") + sys.stdout.write( + json.dumps(results, indent=4, sort_keys=True) + if results + else "No result" + ) sys.stdout.write("\n") else: - result = api.batch_lookup(variants, params=request_parameters, ref_genome=ref_genome) - if write_f is not None: - json.dump(result, write_f, indent=4, sort_keys=True) + result = api.batch_lookup( + variants, params=request_parameters, ref_genome=ref_genome + ) + result = list(result) + if output_file: + with open(output_file, "w") as fp: + json.dump(result, fp, indent=4, sort_keys=True) else: - sys.stdout.write(json.dumps(result, indent=4, sort_keys=True) if result else "No result") + sys.stdout.write( + json.dumps(result, indent=4, sort_keys=True) + if result + else "No result" + ) sys.stdout.write("\n") except Exception as e: - # several things might occur. This is to broad, but lets not keep open file handles + # several things might occur. This is too broad, sys.stderr.write(str(e)) sys.stderr.write("\n") - if write_f is not None: - write_f.close() sys.exit(1) else: - sys.stderr.write('No variants found in file %s\n' % input_file) + sys.stderr.write("No variants found in file %s\n" % input_file) sys.exit(1) if __name__ == "__main__": - annotate_variant(sys.argv[1:]) + annotate_variant() diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 3480374..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[bdist_wheel] -universal=1 \ No newline at end of file diff --git a/setup.py b/setup.py index c77e73f..611a32a 100755 --- a/setup.py +++ b/setup.py @@ -1,47 +1,48 @@ -from setuptools import setup, find_packages import sys from os import path -VERSION = (1, 1, '0b1') +from setuptools import setup, find_packages + +VERSION = (1, 2, "0b1") __version__ = VERSION -__versionstr__ = '.'.join(map(str, VERSION)) +__versionstr__ = ".".join(map(str, VERSION)) here = path.abspath(path.dirname(__file__)) installation_requirements = [ - 'requests>=2.0.0, <3.0.0', - 'PyVCF>=0.6.8', - 'jsonmodels>=2.2' + "requests>=2.0.0, <3.0.0", + "PyVCF3>=1.0.0", + "jsonmodels>=2.2", ] if sys.version_info < (3, 4): - installation_requirements.extend(['asyncio', 'unittest2']) + installation_requirements.extend(["asyncio", "unittest2"]) setup( - name='varsome_api_client', + name="varsome_api_client", version=__versionstr__, - packages=find_packages(".", ), - scripts=['scripts/varsome_api_run.py', 'scripts/varsome_api_annotate_vcf.py'], - url='https://github.com/saphetor/varsome-api-client-python', - license='Apache License, Version 2.0', - test_suite='nose.collector', - tests_require=['nose'], - include_package_data=True, - package_data= { - '': ['*.vcf', '*.csv'], - }, - author='Saphetor S.A.', - author_email='support@saphetor.com', - description='A basic python api client implementation for https://api.varsome.com', + packages=find_packages( + ".", + ), + scripts=["scripts/varsome_api_run.py", "scripts/varsome_api_annotate_vcf.py"], + url="https://github.com/saphetor/varsome-api-client-python", + license="Apache License, Version 2.0", + author="Saphetor S.A.", + author_email="support@saphetor.com", + description="A basic python api client implementation for https://api.varsome.com", classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'Operating System :: OS Independent', - 'License :: OSI Approved :: Apache License', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Topic :: Scientific/Engineering :: Bio-Informatics', + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "License :: OSI Approved :: Apache License", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering :: Bio-Informatics", ], install_requires=installation_requirements, - python_requires='>=3.3', + python_requires=">=3.3", ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/varsome_api/tests/test_client.py b/tests/test_client.py similarity index 64% rename from varsome_api/tests/test_client.py rename to tests/test_client.py index 8f736f4..bfcf0ed 100644 --- a/varsome_api/tests/test_client.py +++ b/tests/test_client.py @@ -17,30 +17,26 @@ import unittest from tempfile import NamedTemporaryFile + try: unittest.TestCase.subTest except AttributeError: import unittest2 as unittest -import vcf -from vcf.parser import _Info + +from vcf.parser import _Info, _encode_type from varsome_api.client import VarSomeAPIClient, VarSomeAPIException from varsome_api.models.variant import AnnotatedVariant -from varsome_api.vcf import VCFAnnotator - -__author__ = "ckopanos" +from varsome_api.vcf import VCFAnnotator as BaseVCFAnnotator, vcf_reader - -API_KEY = os.getenv('VARSOME_API_KEY', None) +API_KEY = os.getenv("VARSOME_API_KEY", None) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -VARIANTS_CSV_FILE = os.path.join(BASE_DIR, 'tests', 'variants.csv') -VARIANTS_VCF_FILE = os.path.join(BASE_DIR, 'tests', 'variants.vcf') - +VARIANTS_CSV_FILE = os.path.join(BASE_DIR, "tests", "variants.csv") +VARIANTS_VCF_FILE = os.path.join(BASE_DIR, "tests", "variants.vcf") class TestApiClient(unittest.TestCase): - - def __init__(self, methodName='runTest'): + def __init__(self, methodName="runTest"): super().__init__(methodName) self.client = VarSomeAPIClient(API_KEY) with open(VARIANTS_CSV_FILE) as f: @@ -59,7 +55,7 @@ def test_schema(self): def test_404(self): """Check we can raise VarSomeAPIException""" with self.assertRaises(VarSomeAPIException) as ve: - self.client.lookup('chrM:410:A:T', ref_genome='hg64') + self.client.lookup("chrM:410:A:T", ref_genome="hg64") test_exception = ve.exception self.assertEqual(test_exception.status, 404) self.client.session.close() @@ -68,31 +64,39 @@ def test_get_lookup_hg19(self): """Check we can do plain get requests""" for i, variant in enumerate(self.variants_to_lookup): with self.subTest(i=i): - result = self.client.lookup(variant, ref_genome='hg19', params={'add-all-data': 1, - 'expand-pubmed-articles': 0}) + result = self.client.lookup( + variant, + ref_genome="hg19", + params={"add-all-data": 1, "expand-pubmed-articles": 0}, + ) self.assertIsNotNone(result) - self.assertTrue('variant_id' in result) + self.assertTrue("variant_id" in result) self.client.session.close() def test_batch_lookup_hg19(self): """Check we can do batch requests""" - results = self.client.batch_lookup(self.variants_to_lookup, ref_genome='hg19', - params={'add-all-data': 1, 'expand-pubmed-articles': 0}, - raise_exceptions=True) + results = self.client.batch_lookup( + self.variants_to_lookup, + ref_genome="hg19", + params={"add-all-data": 1, "expand-pubmed-articles": 0}, + raise_exceptions=True, + ) self.assertEqual(len(results), len(self.variants_to_lookup)) self.client.session.close() class TestApiResponse(unittest.TestCase): - - def __init__(self, methodName='runTest'): + def __init__(self, methodName="runTest"): super().__init__(methodName) client = VarSomeAPIClient(API_KEY, max_variants_per_batch=1000) with open(VARIANTS_CSV_FILE) as f: self.variants_to_lookup = f.read().splitlines() - self.results = client.batch_lookup(self.variants_to_lookup, ref_genome='hg19', - params={'add-all-data': 1, 'expand-pubmed-articles': 0}, - raise_exceptions=True) + self.results = client.batch_lookup( + self.variants_to_lookup, + ref_genome="hg19", + params={"add-all-data": 1, "expand-pubmed-articles": 0}, + raise_exceptions=True, + ) client.session.close() def test_result_is_not_none(self): @@ -105,51 +109,57 @@ def test_result_has_variant_id(self): """Check result includes variant_id""" for i, result in enumerate(self.results): with self.subTest(i=i): - self.assertTrue('variant_id' in result) + self.assertTrue("variant_id" in result) def test_variant_chromosome_result_chromosome(self): """Check that the requested chromosome is the same as the one returned""" for i, result in enumerate(self.results): with self.subTest(i=i): chromosome = self.variants_to_lookup[i].split(":")[0] - self.assertEqual(result['chromosome'], chromosome) + self.assertEqual(result["chromosome"], chromosome) def test_result_wrapper(self): """Check that we can wrap the result in a json model""" for i, result in enumerate(self.results): with self.subTest(i=i): annotated_variant = AnnotatedVariant(**result) - self.assertEqual(annotated_variant.chromosome, result['chromosome']) + self.assertEqual(annotated_variant.chromosome, result["chromosome"]) self.assertIsNotNone(annotated_variant.pos) - self.assertEqual(result['pos'], annotated_variant.pos) - + self.assertEqual(result["pos"], annotated_variant.pos) -class TestVCFAnnotator(VCFAnnotator): - def annotate_record(self, record, variant_result): - record.INFO['gnomad_genomes_AN'] = variant_result.gnomad_genomes_an +class VCFAnnotator(BaseVCFAnnotator): + def annotate_record(self, record, variant_result, original_variant): + record.INFO["gnomad_genomes_AN"] = variant_result.gnomad_genomes_an return record def add_vcf_header_info(self, vcf_template): - vcf_template.infos['gnomad_genomes_AN'] = _Info('gnomad_genomes_AN', '.', 'Integer', - 'GnomAD genomes allele number value', None, None) + vcf_template.infos["gnomad_genomes_AN"] = _Info( + "gnomad_genomes_AN", + ".", + "Integer", + "GnomAD genomes allele number value", + None, + None, + _encode_type("Integer"), + ) -class TestVcfAnnotator(unittest.TestCase): - def __init__(self, methodName='runTest'): +class TestVcfAnnotator(unittest.TestCase): + def __init__(self, methodName="runTest"): super().__init__(methodName) - self.annotator = TestVCFAnnotator(API_KEY) + self.annotator = VCFAnnotator(API_KEY) def test_annotate_vcf(self): """Check that we can annotate a vcf file""" output_vcf_file = NamedTemporaryFile(delete=False) output_vcf_file.close() self.annotator.annotate(VARIANTS_VCF_FILE, output_vcf_file.name) - vcf_reader = vcf.Reader(filename=output_vcf_file.name, strict_whitespace=True) - self.assertTrue('gnomad_genomes_AN' in vcf_reader.infos) - for i, record in enumerate(vcf_reader): - with self.subTest(i=i): - self.assertTrue('gnomad_genomes_AN' in record.INFO) - vcf_reader._reader.close() + with vcf_reader( + filename=output_vcf_file.name, strict_whitespace=True + ) as reader: + self.assertTrue("gnomad_genomes_AN" in reader.infos) + for i, record in enumerate(reader): + with self.subTest(i=i): + self.assertTrue("gnomad_genomes_AN" in record.INFO) self.annotator.session.close() - diff --git a/varsome_api/tests/variants.csv b/tests/variants.csv similarity index 99% rename from varsome_api/tests/variants.csv rename to tests/variants.csv index 22266e1..e3ff866 100644 --- a/varsome_api/tests/variants.csv +++ b/tests/variants.csv @@ -2997,4 +2997,4 @@ chrX:92410936:G:A chr16:87584748:G:A chrX:70348344:G:A chr8:90995019:C:T -chr12:88787112:C:T \ No newline at end of file +chr12:88787112:C:T diff --git a/varsome_api/tests/variants.vcf b/tests/variants.vcf similarity index 100% rename from varsome_api/tests/variants.vcf rename to tests/variants.vcf diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..98e5c3b --- /dev/null +++ b/tox.ini @@ -0,0 +1,9 @@ +# content of: tox.ini , put in same dir as setup.py +[tox] +envlist = py33,py34,py35,py36,py37,py38,py39,py310 + +[testenv] +deps = pytest +passenv = VARSOME_API_KEY +commands = + pytest diff --git a/varsome_api/client.py b/varsome_api/client.py index 2789546..09ca33c 100755 --- a/varsome_api/client.py +++ b/varsome_api/client.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import asyncio import concurrent.futures -import re +import logging import os +import re +from itertools import chain + import requests from requests.exceptions import HTTPError, Timeout, ConnectionError, RequestException @@ -25,9 +27,9 @@ class VarSomeAPIException(Exception): ERROR_CODES = { 400: "Bad request. A parameter you have passed is not valid, or something in your request is wrong", 401: "Not Authorized: either you need to provide authentication credentials, or the credentials provided aren't" - " valid.", + " valid.", 403: "Bad Request: your request is invalid, and we'll return an error message that tells you why. This is the " - "status code returned if you've exceeded the rate limit (see below).", + "status code returned if you've exceeded the rate limit (see below).", 404: "Not Found: either you're requesting an invalid URI or the resource in question doesn't exist", 500: "Internal Server Error: we did something wrong.", 501: "Not implemented.", @@ -43,63 +45,80 @@ def __init__(self, status, response=None): def __str__(self): return "%s (%s)" % ( self.status, - self.ERROR_CODES.get(self.status, 'Unknown error.') if self.response is None else self.response) + self.ERROR_CODES.get(self.status, "Unknown error.") + if self.response is None + else self.response, + ) def __repr__(self): return "%s(status=%s)" % (self.__class__.__name__, self.status) class VarSomeAPIClientBase(object): - _api_url = 'https://api.varsome.com' - _accepted_methods = ('GET', 'POST') + _api_url = "https://api.varsome.com" + _accepted_methods = ("GET", "POST") - def __init__(self, api_key=None, logger=None): + def __init__(self, api_key=None, logger=None, api_url=None): if logger is None: BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) ch.setFormatter(formatter) logger.addHandler(ch) + if api_url is not None: + self._api_url = api_url self.logger = logger self.api_key = api_key - self._headers = {'Accept': 'application/json', 'user-agent': 'VarSomeApiClientPython/2.0'} + self._headers = { + "Accept": "application/json", + "user-agent": "VarSomeApiClientPython/2.0", + } if self.api_key is not None: - self._headers['Authorization'] = "Token " + self.api_key + self._headers["Authorization"] = "Token " + self.api_key self.session = requests.Session() self.session.headers.update(self._headers) def _make_request(self, path, method="GET", params=None, json_data=None): if method not in self._accepted_methods: - raise VarSomeAPIException('', "Unsupported method %s" % method) + raise VarSomeAPIException("", "Unsupported method %s" % method) try: if method == "GET": r = self.session.get(self._api_url + path, params=params, stream=True) if method == "POST": if json_data is None: raise RuntimeError("You need to provide a post request body") - r = self.session.post(self._api_url + path, params=params, json=json_data, - headers={'Content-Type': 'application/json'}, stream=True) - self.logger.info('Time between request and response %s' % r.elapsed) - self.logger.info('Content length %s' % len(r.content)) + r = self.session.post( + self._api_url + path, + params=params, + json=json_data, + headers={"Content-Type": "application/json"}, + stream=True, + ) + self.logger.info("Time between request and response %s" % r.elapsed) + self.logger.info("Content length %s" % len(r.content)) r.raise_for_status() return r except HTTPError as e: response = e.response if response.status_code in VarSomeAPIException.ERROR_CODES: error_message = "Unexpected error" - if r.headers['Content-Type'] == "application/json": + if r.headers["Content-Type"] == "application/json": error_message = response.json().get("detail", None) raise VarSomeAPIException(response.status_code, error_message) - raise VarSomeAPIException('', "Unknown http error %s" % e) + raise VarSomeAPIException("", "Unknown http error %s" % e) except Timeout as e: - raise VarSomeAPIException('', "Request timed out %s" % e) + raise VarSomeAPIException("", "Request timed out %s" % e) except ConnectionError as e: - raise VarSomeAPIException('', "Connection failure or connection refused %s" % e) + raise VarSomeAPIException( + "", "Connection failure or connection refused %s" % e + ) except RequestException as e: - raise VarSomeAPIException('', "Unknown error %s" % e) + raise VarSomeAPIException("", "Unknown error %s" % e) def get(self, path, params=None): response = self._make_request(path, "GET", params=params) @@ -108,13 +127,21 @@ def get(self, path, params=None): def post(self, path, params=None, json_data=None, raise_exceptions=True): # handle api errors in batch requests. try: - response = self._make_request(path, "POST", params=params, json_data=json_data) + response = self._make_request( + path, "POST", params=params, json_data=json_data + ) return response.json() except VarSomeAPIException as e: if raise_exceptions: raise e self.logger.error(e) - return {'error': str(e)} + return [ + { + "error": "Could not annotate variant %s because " + "request failed with %s" % (variant, e) + } + for variant in json_data["variants"] + ] class VarSomeAPIClient(VarSomeAPIClientBase): @@ -123,18 +150,20 @@ class VarSomeAPIClient(VarSomeAPIClientBase): ref_genome_lookup_path = lookup_path + "/%s" batch_lookup_path = "/lookup/batch/%s" - def __init__(self, api_key=None, max_variants_per_batch=200): - super(VarSomeAPIClient, self).__init__(api_key) + def __init__( + self, api_key=None, logger=None, api_url=None, max_variants_per_batch=200 + ): + super(VarSomeAPIClient, self).__init__(api_key, logger, api_url) self.max_variants_per_batch = max_variants_per_batch - - def query_is_variant_id(self, query): + @staticmethod + def query_is_variant_id(query): """ Query may be a variat identifier developed by Saphetor :param query: :return: """ - return re.search(r'^\d{20}$', str(query)) + return re.search(r"^\d{20}$", str(query)) def schema(self): return self.get(self.schema_lookup_path) @@ -153,7 +182,14 @@ def lookup(self, query, params=None, ref_genome=None): url = self.ref_genome_lookup_path % (query, ref_genome) return self.get(url, params=params) - def batch_lookup(self, variants, params=None, ref_genome='hg19', max_threads=3, raise_exceptions=False): + def batch_lookup( + self, + variants, + params=None, + ref_genome="hg19", + max_threads=3, + raise_exceptions=False, + ): """ :param variants: list of variant representations @@ -167,27 +203,30 @@ def batch_lookup(self, variants, params=None, ref_genome='hg19', max_threads=3, :return: list of dictionaries with annotations per variant refer to https://api.varsome.com/lookup/schema for dictionary properties """ - results = [] @asyncio.coroutine - def batch(executor): - loop = asyncio.get_event_loop() + def batch(batch_executor): + batch_loop = asyncio.get_event_loop() futures = [ - loop.run_in_executor( - executor, + batch_loop.run_in_executor( + batch_executor, self.post, - self.batch_lookup_path % ref_genome, params, {'variants': queries}, raise_exceptions + self.batch_lookup_path % ref_genome, + params, + {"variants": queries}, + raise_exceptions, ) - for queries in [variants[x:x + self.max_variants_per_batch] for x in range(0, len(variants), - self.max_variants_per_batch)] + for queries in [ + variants[x : x + self.max_variants_per_batch] + for x in range(0, len(variants), self.max_variants_per_batch) + ] ] responses = yield from asyncio.gather(*futures) - for response in responses: - results.extend(response) + return responses + # Create a limited thread pool. executor = concurrent.futures.ThreadPoolExecutor( - max_workers=max_threads, + max_workers=max_threads, ) loop = asyncio.get_event_loop() - loop.run_until_complete(batch(executor)) - return results + return list(chain.from_iterable(loop.run_until_complete(batch(executor)))) diff --git a/varsome_api/models/__init__.py b/varsome_api/models/__init__.py index ee2f4d4..139597f 100755 --- a/varsome_api/models/__init__.py +++ b/varsome_api/models/__init__.py @@ -1,3 +1,2 @@ -__author__ = "ckopanos" diff --git a/varsome_api/models/elements/__init__.py b/varsome_api/models/elements/__init__.py index b1f5852..c87dc37 100755 --- a/varsome_api/models/elements/__init__.py +++ b/varsome_api/models/elements/__init__.py @@ -1,18 +1,16 @@ -__author__ = "ckopanos" - -from .transcript import * +from .acmg import * from .broad import * -from .gnomad import * -from .thousand_genomes import * +from .dann import * +from .dbnsfp import * from .gerp import * +from .gnomad import * +from .gwas import * +from .iarc import * +from .icgc import * from .isb import * -from .dbnsfp import * -from .dann import * from .ncbi import * from .sanger import * -from .icgc import * -from .iarc import * +from .thousand_genomes import * +from .transcript import * from .uniprot import * from .wustl import * -from .gwas import * -from .acmg import * diff --git a/varsome_api/models/elements/acmg.py b/varsome_api/models/elements/acmg.py index 7650b8d..4d13a9c 100644 --- a/varsome_api/models/elements/acmg.py +++ b/varsome_api/models/elements/acmg.py @@ -14,14 +14,16 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class ACMGClassification(models.Base): met_criteria = fields.BoolField() name = fields.StringField(help_text="ACMG Classification Name") - user_explain = fields.ListField(items_types=(str,), help_text="Criteria explanation", required=False, - nullable=True) + user_explain = fields.ListField( + items_types=(str,), + help_text="Criteria explanation", + required=False, + nullable=True, + ) class ACMGRule(models.Base): @@ -31,12 +33,21 @@ class ACMGRule(models.Base): class ACMGVerdict(models.Base): - classifications = fields.ListField(items_types=(str,), help_text="Classification names", required=False, - nullable=True) + classifications = fields.ListField( + items_types=(str,), + help_text="Classification names", + required=False, + nullable=True, + ) ACMG_rules = fields.EmbeddedField(ACMGRule) class ACMG(models.Base): - classifications = fields.ListField(required=False, items_types=(ACMGClassification,), - help_text="ACMG Classifications") - verdict = fields.EmbeddedField(ACMGVerdict, nullable=True, required=False, help_text="ACMG Verdict") + classifications = fields.ListField( + required=False, + items_types=(ACMGClassification,), + help_text="ACMG Classifications", + ) + verdict = fields.EmbeddedField( + ACMGVerdict, nullable=True, required=False, help_text="ACMG Verdict" + ) diff --git a/varsome_api/models/elements/broad.py b/varsome_api/models/elements/broad.py index 6c994f9..d1a0bf3 100755 --- a/varsome_api/models/elements/broad.py +++ b/varsome_api/models/elements/broad.py @@ -14,8 +14,6 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class ExAC(models.Base): version = fields.StringField(help_text="Version") @@ -24,55 +22,173 @@ class ExAC(models.Base): ac_adj = fields.FloatField(help_text="Allele Count", required=False, nullable=True) an_adj = fields.FloatField(help_text="Allele Number", required=False, nullable=True) af = fields.FloatField(help_text="Allele Frequency", required=False, nullable=True) - ac_afr = fields.IntField(help_text="Allele Count African", required=False, nullable=True) - ac_amr = fields.IntField(help_text="Allele Count American", required=False, nullable=True) - ac_asj = fields.IntField(help_text="Allele Count Ashkenazi Jewish", required=False, nullable=True) - ac_eas = fields.IntField(help_text="Allele Count East Asian", required=False, nullable=True) - ac_fin = fields.IntField(help_text="Allele Count European (Finnish)", required=False, nullable=True) - ac_nfe = fields.IntField(help_text="Allele Count European (Non-Finnish)", required=False, nullable=True) - ac_oth = fields.IntField(help_text="Allele Count Other", required=False, nullable=True) - ac_sas = fields.IntField(help_text="Allele Count South Asian", required=False, nullable=True) - ac_male = fields.IntField(help_text="Allele Count Male", required=False, nullable=True) - ac_female = fields.IntField(help_text="Allele Count Female", required=False, nullable=True) - hom = fields.IntField(help_text="Number of Homozygotes", required=False, nullable=True) - hemi = fields.IntField(help_text="Number of Hemizygotes", required=False, nullable=True) - ac_hom = fields.FloatField(help_text="Number of Homozygotes", required=False, nullable=True) - ac_hemi = fields.FloatField(help_text="Number of Hemizygotes", required=False, nullable=True) - an_afr = fields.IntField(help_text="Allele Number African", required=False, nullable=True) - an_amr = fields.IntField(help_text="Allele Number American", required=False, nullable=True) - an_asj = fields.IntField(help_text="Allele Number Ashkenazi Jewish", required=False, nullable=True) - an_eas = fields.IntField(help_text="Allele Number East Asian", required=False, nullable=True) - an_fin = fields.IntField(help_text="Allele Number European (Finnish)", required=False, nullable=True) - an_nfe = fields.IntField(help_text="Allele Number European (Non-Finnish)", required=False, nullable=True) - an_oth = fields.IntField(help_text="Allele Number Other", required=False, nullable=True) - an_sas = fields.IntField(help_text="Allele Number South Asian", required=False, nullable=True) - an_male = fields.IntField(help_text="Allele Number Male", required=False, nullable=True) - an_female = fields.IntField(help_text="Allele Number Female", required=False, nullable=True) - hom_afr = fields.IntField(help_text="Number of Homozygotes African", required=False, nullable=True) - hom_amr = fields.IntField(help_text="Number of Homozygotes American", required=False, nullable=True) - hom_asj = fields.IntField(help_text="Number of Homozygotes Ashkenazi Jewish", required=False, nullable=True) - hom_eas = fields.IntField(help_text="Number of Homozygotes East Asian", required=False, nullable=True) - hom_fin = fields.IntField(help_text="Number of Homozygotes European (Finnish)", required=False, nullable=True) - hom_nfe = fields.IntField(help_text="Number of Homozygotes European (Non-Finnish)", required=False, nullable=True) - hom_oth = fields.IntField(help_text="Number of Homozygotes Other", required=False, nullable=True) - hom_sas = fields.IntField(help_text="Number of Homozygotes South Asian", required=False, nullable=True) - hom_male = fields.IntField(help_text="Number of Homozygotes Male", required=False, nullable=True) - hom_female = fields.IntField(help_text="Number of Homozygotes Female", required=False, nullable=True) - hemi_afr = fields.IntField(help_text="Number of Hemizygotes African", required=False, nullable=True) - hemi_amr = fields.IntField(help_text="Number of Hemizygotes American", required=False, nullable=True) - hemi_asj = fields.IntField(help_text="Number of Hemizygotes Ashkenazi Jewish", required=False, nullable=True) - hemi_eas = fields.IntField(help_text="Number of Hemizygotes East Asian", required=False, nullable=True) - hemi_fin = fields.IntField(help_text="Number of Hemizygotes European (Finnish)", required=False, nullable=True) - hemi_nfe = fields.IntField(help_text="Number of Hemizygotes European (Non-Finnish)", required=False, nullable=True) - hemi_oth = fields.IntField(help_text="Number of Hemizygotes Other", required=False, nullable=True) - hemi_sas = fields.IntField(help_text="Number of Hemizygotes South Asian", required=False, nullable=True) - af_afr = fields.FloatField(help_text="Allele Frequency African", required=False, nullable=True) - af_amr = fields.FloatField(help_text="Allele Frequency American", required=False, nullable=True) - af_asj = fields.FloatField(help_text="Allele Frequency Ashkenazi Jewish", required=False, nullable=True) - af_eas = fields.FloatField(help_text="Allele Frequency East Asian", required=False, nullable=True) - af_fin = fields.FloatField(help_text="Allele Frequency European (Finnish)", required=False, nullable=True) - af_nfe = fields.FloatField(help_text="Allele Frequency European (Non-Finnish)", required=False, nullable=True) - af_oth = fields.FloatField(help_text="Allele Frequency Other", required=False, nullable=True) - af_sas = fields.FloatField(help_text="Allele Frequency South Asian", required=False, nullable=True) - af_male = fields.FloatField(help_text="Allele Frequency Male", required=False, nullable=True) - af_female = fields.FloatField(help_text="Allele Frequency Female", required=False, nullable=True) \ No newline at end of file + ac_afr = fields.IntField( + help_text="Allele Count African", required=False, nullable=True + ) + ac_amr = fields.IntField( + help_text="Allele Count American", required=False, nullable=True + ) + ac_asj = fields.IntField( + help_text="Allele Count Ashkenazi Jewish", required=False, nullable=True + ) + ac_eas = fields.IntField( + help_text="Allele Count East Asian", required=False, nullable=True + ) + ac_fin = fields.IntField( + help_text="Allele Count European (Finnish)", required=False, nullable=True + ) + ac_nfe = fields.IntField( + help_text="Allele Count European (Non-Finnish)", required=False, nullable=True + ) + ac_oth = fields.IntField( + help_text="Allele Count Other", required=False, nullable=True + ) + ac_sas = fields.IntField( + help_text="Allele Count South Asian", required=False, nullable=True + ) + ac_male = fields.IntField( + help_text="Allele Count Male", required=False, nullable=True + ) + ac_female = fields.IntField( + help_text="Allele Count Female", required=False, nullable=True + ) + hom = fields.IntField( + help_text="Number of Homozygotes", required=False, nullable=True + ) + hemi = fields.IntField( + help_text="Number of Hemizygotes", required=False, nullable=True + ) + ac_hom = fields.FloatField( + help_text="Number of Homozygotes", required=False, nullable=True + ) + ac_hemi = fields.FloatField( + help_text="Number of Hemizygotes", required=False, nullable=True + ) + an_afr = fields.IntField( + help_text="Allele Number African", required=False, nullable=True + ) + an_amr = fields.IntField( + help_text="Allele Number American", required=False, nullable=True + ) + an_asj = fields.IntField( + help_text="Allele Number Ashkenazi Jewish", required=False, nullable=True + ) + an_eas = fields.IntField( + help_text="Allele Number East Asian", required=False, nullable=True + ) + an_fin = fields.IntField( + help_text="Allele Number European (Finnish)", required=False, nullable=True + ) + an_nfe = fields.IntField( + help_text="Allele Number European (Non-Finnish)", required=False, nullable=True + ) + an_oth = fields.IntField( + help_text="Allele Number Other", required=False, nullable=True + ) + an_sas = fields.IntField( + help_text="Allele Number South Asian", required=False, nullable=True + ) + an_male = fields.IntField( + help_text="Allele Number Male", required=False, nullable=True + ) + an_female = fields.IntField( + help_text="Allele Number Female", required=False, nullable=True + ) + hom_afr = fields.IntField( + help_text="Number of Homozygotes African", required=False, nullable=True + ) + hom_amr = fields.IntField( + help_text="Number of Homozygotes American", required=False, nullable=True + ) + hom_asj = fields.IntField( + help_text="Number of Homozygotes Ashkenazi Jewish", + required=False, + nullable=True, + ) + hom_eas = fields.IntField( + help_text="Number of Homozygotes East Asian", required=False, nullable=True + ) + hom_fin = fields.IntField( + help_text="Number of Homozygotes European (Finnish)", + required=False, + nullable=True, + ) + hom_nfe = fields.IntField( + help_text="Number of Homozygotes European (Non-Finnish)", + required=False, + nullable=True, + ) + hom_oth = fields.IntField( + help_text="Number of Homozygotes Other", required=False, nullable=True + ) + hom_sas = fields.IntField( + help_text="Number of Homozygotes South Asian", required=False, nullable=True + ) + hom_male = fields.IntField( + help_text="Number of Homozygotes Male", required=False, nullable=True + ) + hom_female = fields.IntField( + help_text="Number of Homozygotes Female", required=False, nullable=True + ) + hemi_afr = fields.IntField( + help_text="Number of Hemizygotes African", required=False, nullable=True + ) + hemi_amr = fields.IntField( + help_text="Number of Hemizygotes American", required=False, nullable=True + ) + hemi_asj = fields.IntField( + help_text="Number of Hemizygotes Ashkenazi Jewish", + required=False, + nullable=True, + ) + hemi_eas = fields.IntField( + help_text="Number of Hemizygotes East Asian", required=False, nullable=True + ) + hemi_fin = fields.IntField( + help_text="Number of Hemizygotes European (Finnish)", + required=False, + nullable=True, + ) + hemi_nfe = fields.IntField( + help_text="Number of Hemizygotes European (Non-Finnish)", + required=False, + nullable=True, + ) + hemi_oth = fields.IntField( + help_text="Number of Hemizygotes Other", required=False, nullable=True + ) + hemi_sas = fields.IntField( + help_text="Number of Hemizygotes South Asian", required=False, nullable=True + ) + af_afr = fields.FloatField( + help_text="Allele Frequency African", required=False, nullable=True + ) + af_amr = fields.FloatField( + help_text="Allele Frequency American", required=False, nullable=True + ) + af_asj = fields.FloatField( + help_text="Allele Frequency Ashkenazi Jewish", required=False, nullable=True + ) + af_eas = fields.FloatField( + help_text="Allele Frequency East Asian", required=False, nullable=True + ) + af_fin = fields.FloatField( + help_text="Allele Frequency European (Finnish)", required=False, nullable=True + ) + af_nfe = fields.FloatField( + help_text="Allele Frequency European (Non-Finnish)", + required=False, + nullable=True, + ) + af_oth = fields.FloatField( + help_text="Allele Frequency Other", required=False, nullable=True + ) + af_sas = fields.FloatField( + help_text="Allele Frequency South Asian", required=False, nullable=True + ) + af_male = fields.FloatField( + help_text="Allele Frequency Male", required=False, nullable=True + ) + af_female = fields.FloatField( + help_text="Allele Frequency Female", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/dann.py b/varsome_api/models/elements/dann.py index 30d7a73..20815e3 100755 --- a/varsome_api/models/elements/dann.py +++ b/varsome_api/models/elements/dann.py @@ -14,9 +14,9 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class DannSNVs(models.Base): version = fields.StringField(help_text="Version") - dann_score = fields.FloatField(help_text="DANN Score", required=False, nullable=True) + dann_score = fields.FloatField( + help_text="DANN Score", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/dbnsfp.py b/varsome_api/models/elements/dbnsfp.py index d0dd066..dc88b99 100755 --- a/varsome_api/models/elements/dbnsfp.py +++ b/varsome_api/models/elements/dbnsfp.py @@ -13,113 +13,211 @@ # limitations under the License. from jsonmodels import models, fields -from varsome_api.models.fields import NullableItemListField -__author__ = "ckopanos" +from varsome_api.models.fields import NullableItemListField class DbNSFP(models.Base): version = fields.StringField(help_text="Version") - mutationtaster_pred = NullableItemListField(items_types=(str,), help_text="MutationTaster Prediction", - required=False) - mutationtaster_score = NullableItemListField(items_types=(float,), - help_text="MutationTaster Accuracy", required=False, nullable=True) - sift_score = NullableItemListField(items_types=(float,), help_text="SIFT score", required=False, nullable=True) - sift_prediction = fields.StringField(help_text="SIFT prediction", required=False, nullable=True) - phylop100way_vertebrate = NullableItemListField(items_types=(float,), help_text="phyloP100way vertebrate", - required=False) - phylop46way_placental = NullableItemListField(items_types=(float,), help_text="phyloP46way placental", - required=False) - phylop46way_primate = NullableItemListField(items_types=(float,), help_text="phyloP46way primate", - required=False) - mutationtaster_converted_rankscore = NullableItemListField(items_types=(float,), - help_text="MutationTaster converted rankscore", - required=False, nullable=True) - mutationassessor_pred = NullableItemListField(items_types=(str,), - help_text="MutationAssessor prediction", - required=False) - mutationassessor_score = NullableItemListField(items_types=(float,), help_text="MutationAssessor score", - required=False) - mutationassessor_score_rankscore = NullableItemListField(items_types=(float,), - help_text="MutationAssessor rankscore", required=False, - nullable=True) - fathmm_mkl_coding_pred = NullableItemListField(items_types=(str,), - help_text="FATHMM-MKL coding prediction", required=False, nullable=True) - fathmm_mkl_coding_score = NullableItemListField(items_types=(float,), help_text="FATHMM-MKL coding score", - required=False) - fathmm_mkl_coding_rankscore = NullableItemListField(items_types=(float,), - help_text="FATHMM-MKL coding rankscore", required=False, - nullable=True) - fathmm_pred = NullableItemListField(items_types=(str,), help_text="FATHMM prediction", required=False, nullable=True) - fathmm_score = NullableItemListField(items_types=(float,), help_text="FATHMM score", - required=False) - fathmm_converted_rankscore = NullableItemListField(items_types=(float,), help_text="FATHMM converted rankscore", - required=False, nullable=True) - sift_converted_rankscore = NullableItemListField(items_types=(float,), help_text="SIFT converted rankscore", - required=False) - metasvm_pred = NullableItemListField(items_types=(str,), help_text="MetaSVM prediction", - required=False) - metasvm_score = NullableItemListField(items_types=(float,), help_text="MetaSVM score", required=False, nullable=True) - metasvm_rankscore = NullableItemListField(items_types=(float,), help_text="MetaSVM rankscore", - required=False) - metalr_pred = NullableItemListField(items_types=(str,), help_text="MetalR prediction", required=False, nullable=True) - metalr_score = NullableItemListField(items_types=(float,), help_text="MetalR score", required=False, nullable=True) - metalr_rankscore = NullableItemListField(items_types=(float,), help_text="MetalR rankscore", required=False, - nullable=True) - provean_pred = NullableItemListField(items_types=(str,), help_text="Provean prediction", - required=False) - provean_score = NullableItemListField(items_types=(float,), help_text="Provean score", - required=False) - provean_converted_rankscore = NullableItemListField(items_types=(float,), - help_text="Provean converted rankscore", required=False, - nullable=True) - lrt_pred = NullableItemListField(items_types=(str,), help_text="LRT prediction", required=False, nullable=True) - lrt_score = NullableItemListField(items_types=(float,), help_text="LRT score", required=False, nullable=True) - lrt_converted_rankscore = NullableItemListField(items_types=(float,), help_text="LRT converted rankscore", - required=False) - lrt_omega = NullableItemListField(items_types=(float,), help_text="LRT Omega", required=False, nullable=True) - cadd_raw = NullableItemListField(items_types=(float,), help_text="CADD raw score", required=False, nullable=True) - cadd_raw_rankscore = NullableItemListField(items_types=(float,), help_text="CADD raw rankscore", - required=False) - cadd_phred = NullableItemListField(items_types=(float,), help_text="CADD phred", required=False, nullable=True) - gm12878_confidence_value = NullableItemListField(items_types=(float,), - help_text="GM12878 fitCons confidence value", required=False, - nullable=True) - gm12878_fitcons_score = NullableItemListField(items_types=(float,), help_text="GM12878 fitCons score", - required=False) - gm12878_fitcons_score_rankscore = NullableItemListField(items_types=(float,), - help_text="GM12878 fitCons rankscore", required=False, - nullable=True) - siphy_29way_logodds_rankscore = NullableItemListField(items_types=(float,), - help_text="SiPhy29way logOdds rankscore", required=False, - nullable=True) - siphy_29way_pi = NullableItemListField(items_types=(float,), - help_text="SiPhy29way pi", required=False, nullable=True) - phylop20way_mammalian = NullableItemListField(items_types=(float,), help_text="phyloP20way mammalian", - required=False) - phylop20way_mammalian_rankscore = NullableItemListField(items_types=(float,), - help_text="phyloP20way mammalian rankscore", required=False, - nullable=True) - phylop100way_vertebrate_rankscore = NullableItemListField(items_types=(float,), - help_text="phyloP100way vertebrate rankscore", required=False, - nullable=True) - phastcons20way_mammalian = NullableItemListField(items_types=(float,), help_text="phastCons20way mammalian", - required=False) - phastcons20way_mammalian_rankscore = NullableItemListField(items_types=(float,), - help_text="phastCons20way mammalian rankscore", - required=False, nullable=True) - phastcons100way_vertebrate = NullableItemListField(items_types=(float,), - help_text="phastCons100way vertebrate", required=False, nullable=True) - phastcons100way_vertebrate_rankscore = NullableItemListField(items_types=(float,), - help_text="phastCons100way vertebrate rankscore", - required=False, nullable=True) - vest3_score = NullableItemListField(items_types=(float,), help_text="VEST3 score", - required=False) - vest3_rankscore = NullableItemListField(items_types=(float,), - help_text="VEST3 rankscore", required=False, nullable=True) + mutationtaster_pred = NullableItemListField( + items_types=(str,), help_text="MutationTaster Prediction", required=False + ) + mutationtaster_score = NullableItemListField( + items_types=(float,), + help_text="MutationTaster Accuracy", + required=False, + nullable=True, + ) + sift_score = NullableItemListField( + items_types=(float,), help_text="SIFT score", required=False, nullable=True + ) + sift_prediction = fields.StringField( + help_text="SIFT prediction", required=False, nullable=True + ) + phylop100way_vertebrate = NullableItemListField( + items_types=(float,), help_text="phyloP100way vertebrate", required=False + ) + phylop46way_placental = NullableItemListField( + items_types=(float,), help_text="phyloP46way placental", required=False + ) + phylop46way_primate = NullableItemListField( + items_types=(float,), help_text="phyloP46way primate", required=False + ) + mutationtaster_converted_rankscore = NullableItemListField( + items_types=(float,), + help_text="MutationTaster converted rankscore", + required=False, + nullable=True, + ) + mutationassessor_pred = NullableItemListField( + items_types=(str,), help_text="MutationAssessor prediction", required=False + ) + mutationassessor_score = NullableItemListField( + items_types=(float,), help_text="MutationAssessor score", required=False + ) + mutationassessor_score_rankscore = NullableItemListField( + items_types=(float,), + help_text="MutationAssessor rankscore", + required=False, + nullable=True, + ) + fathmm_mkl_coding_pred = NullableItemListField( + items_types=(str,), + help_text="FATHMM-MKL coding prediction", + required=False, + nullable=True, + ) + fathmm_mkl_coding_score = NullableItemListField( + items_types=(float,), help_text="FATHMM-MKL coding score", required=False + ) + fathmm_mkl_coding_rankscore = NullableItemListField( + items_types=(float,), + help_text="FATHMM-MKL coding rankscore", + required=False, + nullable=True, + ) + fathmm_pred = NullableItemListField( + items_types=(str,), help_text="FATHMM prediction", required=False, nullable=True + ) + fathmm_score = NullableItemListField( + items_types=(float,), help_text="FATHMM score", required=False + ) + fathmm_converted_rankscore = NullableItemListField( + items_types=(float,), + help_text="FATHMM converted rankscore", + required=False, + nullable=True, + ) + sift_converted_rankscore = NullableItemListField( + items_types=(float,), help_text="SIFT converted rankscore", required=False + ) + metasvm_pred = NullableItemListField( + items_types=(str,), help_text="MetaSVM prediction", required=False + ) + metasvm_score = NullableItemListField( + items_types=(float,), help_text="MetaSVM score", required=False, nullable=True + ) + metasvm_rankscore = NullableItemListField( + items_types=(float,), help_text="MetaSVM rankscore", required=False + ) + metalr_pred = NullableItemListField( + items_types=(str,), help_text="MetalR prediction", required=False, nullable=True + ) + metalr_score = NullableItemListField( + items_types=(float,), help_text="MetalR score", required=False, nullable=True + ) + metalr_rankscore = NullableItemListField( + items_types=(float,), + help_text="MetalR rankscore", + required=False, + nullable=True, + ) + provean_pred = NullableItemListField( + items_types=(str,), help_text="Provean prediction", required=False + ) + provean_score = NullableItemListField( + items_types=(float,), help_text="Provean score", required=False + ) + provean_converted_rankscore = NullableItemListField( + items_types=(float,), + help_text="Provean converted rankscore", + required=False, + nullable=True, + ) + lrt_pred = NullableItemListField( + items_types=(str,), help_text="LRT prediction", required=False, nullable=True + ) + lrt_score = NullableItemListField( + items_types=(float,), help_text="LRT score", required=False, nullable=True + ) + lrt_converted_rankscore = NullableItemListField( + items_types=(float,), help_text="LRT converted rankscore", required=False + ) + lrt_omega = NullableItemListField( + items_types=(float,), help_text="LRT Omega", required=False, nullable=True + ) + cadd_raw = NullableItemListField( + items_types=(float,), help_text="CADD raw score", required=False, nullable=True + ) + cadd_raw_rankscore = NullableItemListField( + items_types=(float,), help_text="CADD raw rankscore", required=False + ) + cadd_phred = NullableItemListField( + items_types=(float,), help_text="CADD phred", required=False, nullable=True + ) + gm12878_confidence_value = NullableItemListField( + items_types=(float,), + help_text="GM12878 fitCons confidence value", + required=False, + nullable=True, + ) + gm12878_fitcons_score = NullableItemListField( + items_types=(float,), help_text="GM12878 fitCons score", required=False + ) + gm12878_fitcons_score_rankscore = NullableItemListField( + items_types=(float,), + help_text="GM12878 fitCons rankscore", + required=False, + nullable=True, + ) + siphy_29way_logodds_rankscore = NullableItemListField( + items_types=(float,), + help_text="SiPhy29way logOdds rankscore", + required=False, + nullable=True, + ) + siphy_29way_pi = NullableItemListField( + items_types=(float,), help_text="SiPhy29way pi", required=False, nullable=True + ) + phylop20way_mammalian = NullableItemListField( + items_types=(float,), help_text="phyloP20way mammalian", required=False + ) + phylop20way_mammalian_rankscore = NullableItemListField( + items_types=(float,), + help_text="phyloP20way mammalian rankscore", + required=False, + nullable=True, + ) + phylop100way_vertebrate_rankscore = NullableItemListField( + items_types=(float,), + help_text="phyloP100way vertebrate rankscore", + required=False, + nullable=True, + ) + phastcons20way_mammalian = NullableItemListField( + items_types=(float,), help_text="phastCons20way mammalian", required=False + ) + phastcons20way_mammalian_rankscore = NullableItemListField( + items_types=(float,), + help_text="phastCons20way mammalian rankscore", + required=False, + nullable=True, + ) + phastcons100way_vertebrate = NullableItemListField( + items_types=(float,), + help_text="phastCons100way vertebrate", + required=False, + nullable=True, + ) + phastcons100way_vertebrate_rankscore = NullableItemListField( + items_types=(float,), + help_text="phastCons100way vertebrate rankscore", + required=False, + nullable=True, + ) + vest3_score = NullableItemListField( + items_types=(float,), help_text="VEST3 score", required=False + ) + vest3_rankscore = NullableItemListField( + items_types=(float,), help_text="VEST3 rankscore", required=False, nullable=True + ) class DBscSNV(models.Base): version = fields.StringField(help_text="Version") - ada_score = NullableItemListField(items_types=(float, ), help_text='ADA Score', required=False, nullable=True) - rf_score = NullableItemListField(items_types=(float,), help_text='RF Score', required=False, nullable=True) + ada_score = NullableItemListField( + items_types=(float,), help_text="ADA Score", required=False, nullable=True + ) + rf_score = NullableItemListField( + items_types=(float,), help_text="RF Score", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/gerp.py b/varsome_api/models/elements/gerp.py index 06b34b3..5496569 100755 --- a/varsome_api/models/elements/gerp.py +++ b/varsome_api/models/elements/gerp.py @@ -14,10 +14,18 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class Gerp(models.Base): version = fields.StringField(help_text="Version") - gerp_nr = fields.ListField(items_types=(float, type(None)), required=False, help_text="GERP NR", nullable=True) - gerp_rs = fields.ListField(items_types=(float, type(None)), required=False, help_text="GERP RS", nullable=True) + gerp_nr = fields.ListField( + items_types=(float, type(None)), + required=False, + help_text="GERP NR", + nullable=True, + ) + gerp_rs = fields.ListField( + items_types=(float, type(None)), + required=False, + help_text="GERP RS", + nullable=True, + ) diff --git a/varsome_api/models/elements/gnomad.py b/varsome_api/models/elements/gnomad.py index 4baf52d..9bdfc94 100755 --- a/varsome_api/models/elements/gnomad.py +++ b/varsome_api/models/elements/gnomad.py @@ -14,18 +14,27 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class GnomADCoverage(models.Base): version = fields.StringField(help_text="Version") - coverage_mean = fields.ListField(help_text="Mean coverage", - items_types=(float, type(None)), required=False, nullable=True) - coverage_median = fields.ListField(help_text="Median coverage", - items_types=(float, type(None)), required=False, nullable=True) - coverage_20_frequency = fields.ListField(help_text="Proportion of samples with over 20x", - items_types=(float, type(None)), - required=False, nullable=True) + coverage_mean = fields.ListField( + help_text="Mean coverage", + items_types=(float, type(None)), + required=False, + nullable=True, + ) + coverage_median = fields.ListField( + help_text="Median coverage", + items_types=(float, type(None)), + required=False, + nullable=True, + ) + coverage_20_frequency = fields.ListField( + help_text="Proportion of samples with over 20x", + items_types=(float, type(None)), + required=False, + nullable=True, + ) class GnomAD(models.Base): @@ -35,44 +44,134 @@ class GnomAD(models.Base): ac_adj = fields.FloatField(help_text="Allele Count", required=False, nullable=True) an_adj = fields.FloatField(help_text="Allele Number", required=False, nullable=True) af = fields.FloatField(help_text="Allele Frequency", required=False, nullable=True) - ac_afr = fields.IntField(help_text="Allele Count African", required=False, nullable=True) - ac_amr = fields.IntField(help_text="Allele Count American", required=False, nullable=True) - ac_asj = fields.IntField(help_text="Allele Count Ashkenazi Jewish", required=False, nullable=True) - ac_eas = fields.IntField(help_text="Allele Count East Asian", required=False, nullable=True) - ac_fin = fields.IntField(help_text="Allele Count European (Finnish)", required=False, nullable=True) - ac_nfe = fields.IntField(help_text="Allele Count European (Non-Finnish)", required=False, nullable=True) - ac_oth = fields.IntField(help_text="Allele Count Other", required=False, nullable=True) - ac_male = fields.IntField(help_text="Allele Count Male", required=False, nullable=True) - ac_female = fields.IntField(help_text="Allele Count Female", required=False, nullable=True) - hom = fields.IntField(help_text="Number of Homozygotes", required=False, nullable=True) - hemi = fields.IntField(help_text="Number of Hemizygotes", required=False, nullable=True) - ac_hom = fields.FloatField(help_text="Number of Homozygotes", required=False, nullable=True) - ac_hemi = fields.FloatField(help_text="Number of Hemizygotes", required=False, nullable=True) - an_afr = fields.IntField(help_text="Allele Number African", required=False, nullable=True) - an_amr = fields.IntField(help_text="Allele Number American", required=False, nullable=True) - an_asj = fields.IntField(help_text="Allele Number Ashkenazi Jewish", required=False, nullable=True) - an_eas = fields.IntField(help_text="Allele Number East Asian", required=False, nullable=True) - an_fin = fields.IntField(help_text="Allele Number European (Finnish)", required=False, nullable=True) - an_nfe = fields.IntField(help_text="Allele Number European (Non-Finnish)", required=False, nullable=True) - an_oth = fields.IntField(help_text="Allele Number Other", required=False, nullable=True) - an_male = fields.IntField(help_text="Allele Number Male", required=False, nullable=True) - an_female = fields.IntField(help_text="Allele Number Female", required=False, nullable=True) - hom_afr = fields.IntField(help_text="Number of Homozygotes African", required=False, nullable=True) - hom_amr = fields.IntField(help_text="Number of Homozygotes American", required=False, nullable=True) - hom_asj = fields.IntField(help_text="Number of Homozygotes Ashkenazi Jewish", required=False, nullable=True) - hom_eas = fields.IntField(help_text="Number of Homozygotes East Asian", required=False, nullable=True) - hom_fin = fields.IntField(help_text="Number of Homozygotes European (Finnish)", required=False, nullable=True) - hom_nfe = fields.IntField(help_text="Number of Homozygotes European (Non-Finnish)", required=False, nullable=True) - hom_oth = fields.IntField(help_text="Number of Homozygotes Other", required=False, nullable=True) - hom_male = fields.IntField(help_text="Number of Homozygotes Male", required=False, nullable=True) - hom_female = fields.IntField(help_text="Number of Homozygotes Female", required=False, nullable=True) - af_afr = fields.FloatField(help_text="Allele Frequency African", required=False, nullable=True) - af_amr = fields.FloatField(help_text="Allele Frequency American", required=False, nullable=True) - af_asj = fields.FloatField(help_text="Allele Frequency Ashkenazi Jewish", required=False, nullable=True) - af_eas = fields.FloatField(help_text="Allele Frequency East Asian", required=False, nullable=True) - af_fin = fields.FloatField(help_text="Allele Frequency European (Finnish)", required=False, nullable=True) - af_nfe = fields.FloatField(help_text="Allele Frequency European (Non-Finnish)", required=False, nullable=True) - af_oth = fields.FloatField(help_text="Allele Frequency Other", required=False, nullable=True) - af_male = fields.FloatField(help_text="Allele Frequency Male", required=False, nullable=True) - af_female = fields.FloatField(help_text="Allele Frequency Female", required=False, nullable=True) - main_data = fields.StringField(help_text="Main data point", required=False, nullable=True) + ac_afr = fields.IntField( + help_text="Allele Count African", required=False, nullable=True + ) + ac_amr = fields.IntField( + help_text="Allele Count American", required=False, nullable=True + ) + ac_asj = fields.IntField( + help_text="Allele Count Ashkenazi Jewish", required=False, nullable=True + ) + ac_eas = fields.IntField( + help_text="Allele Count East Asian", required=False, nullable=True + ) + ac_fin = fields.IntField( + help_text="Allele Count European (Finnish)", required=False, nullable=True + ) + ac_nfe = fields.IntField( + help_text="Allele Count European (Non-Finnish)", required=False, nullable=True + ) + ac_oth = fields.IntField( + help_text="Allele Count Other", required=False, nullable=True + ) + ac_male = fields.IntField( + help_text="Allele Count Male", required=False, nullable=True + ) + ac_female = fields.IntField( + help_text="Allele Count Female", required=False, nullable=True + ) + hom = fields.IntField( + help_text="Number of Homozygotes", required=False, nullable=True + ) + hemi = fields.IntField( + help_text="Number of Hemizygotes", required=False, nullable=True + ) + ac_hom = fields.FloatField( + help_text="Number of Homozygotes", required=False, nullable=True + ) + ac_hemi = fields.FloatField( + help_text="Number of Hemizygotes", required=False, nullable=True + ) + an_afr = fields.IntField( + help_text="Allele Number African", required=False, nullable=True + ) + an_amr = fields.IntField( + help_text="Allele Number American", required=False, nullable=True + ) + an_asj = fields.IntField( + help_text="Allele Number Ashkenazi Jewish", required=False, nullable=True + ) + an_eas = fields.IntField( + help_text="Allele Number East Asian", required=False, nullable=True + ) + an_fin = fields.IntField( + help_text="Allele Number European (Finnish)", required=False, nullable=True + ) + an_nfe = fields.IntField( + help_text="Allele Number European (Non-Finnish)", required=False, nullable=True + ) + an_oth = fields.IntField( + help_text="Allele Number Other", required=False, nullable=True + ) + an_male = fields.IntField( + help_text="Allele Number Male", required=False, nullable=True + ) + an_female = fields.IntField( + help_text="Allele Number Female", required=False, nullable=True + ) + hom_afr = fields.IntField( + help_text="Number of Homozygotes African", required=False, nullable=True + ) + hom_amr = fields.IntField( + help_text="Number of Homozygotes American", required=False, nullable=True + ) + hom_asj = fields.IntField( + help_text="Number of Homozygotes Ashkenazi Jewish", + required=False, + nullable=True, + ) + hom_eas = fields.IntField( + help_text="Number of Homozygotes East Asian", required=False, nullable=True + ) + hom_fin = fields.IntField( + help_text="Number of Homozygotes European (Finnish)", + required=False, + nullable=True, + ) + hom_nfe = fields.IntField( + help_text="Number of Homozygotes European (Non-Finnish)", + required=False, + nullable=True, + ) + hom_oth = fields.IntField( + help_text="Number of Homozygotes Other", required=False, nullable=True + ) + hom_male = fields.IntField( + help_text="Number of Homozygotes Male", required=False, nullable=True + ) + hom_female = fields.IntField( + help_text="Number of Homozygotes Female", required=False, nullable=True + ) + af_afr = fields.FloatField( + help_text="Allele Frequency African", required=False, nullable=True + ) + af_amr = fields.FloatField( + help_text="Allele Frequency American", required=False, nullable=True + ) + af_asj = fields.FloatField( + help_text="Allele Frequency Ashkenazi Jewish", required=False, nullable=True + ) + af_eas = fields.FloatField( + help_text="Allele Frequency East Asian", required=False, nullable=True + ) + af_fin = fields.FloatField( + help_text="Allele Frequency European (Finnish)", required=False, nullable=True + ) + af_nfe = fields.FloatField( + help_text="Allele Frequency European (Non-Finnish)", + required=False, + nullable=True, + ) + af_oth = fields.FloatField( + help_text="Allele Frequency Other", required=False, nullable=True + ) + af_male = fields.FloatField( + help_text="Allele Frequency Male", required=False, nullable=True + ) + af_female = fields.FloatField( + help_text="Allele Frequency Female", required=False, nullable=True + ) + main_data = fields.StringField( + help_text="Main data point", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/gwas.py b/varsome_api/models/elements/gwas.py index f339728..2af7f2c 100755 --- a/varsome_api/models/elements/gwas.py +++ b/varsome_api/models/elements/gwas.py @@ -14,29 +14,49 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class GWASDetails(models.Base): - gwas_symbol = fields.StringField(help_text="GWAS symbol", required=False, nullable=True) + gwas_symbol = fields.StringField( + help_text="GWAS symbol", required=False, nullable=True + ) date = fields.StringField(help_text="Date", required=False, nullable=True) study = fields.StringField(help_text="Study", required=False, nullable=True) - disease_or_trait = fields.StringField(help_text="Disease or trait", required=False, nullable=True) - mapped_traits = fields.ListField(items_types=(str,), help_text="Mapped trait", required=False, nullable=True) - mapped_trait_urls = fields.ListField(items_types=(str,), help_text="Mapped trait URL", required=False, - nullable=True) - strongest_snp_risk_allele = fields.StringField(help_text="Strongest SNP risk allele", required=False, nullable=True) - odds_ratio = fields.FloatField(help_text="Odds ratio", required=False, nullable=True) + disease_or_trait = fields.StringField( + help_text="Disease or trait", required=False, nullable=True + ) + mapped_traits = fields.ListField( + items_types=(str,), help_text="Mapped trait", required=False, nullable=True + ) + mapped_trait_urls = fields.ListField( + items_types=(str,), help_text="Mapped trait URL", required=False, nullable=True + ) + strongest_snp_risk_allele = fields.StringField( + help_text="Strongest SNP risk allele", required=False, nullable=True + ) + odds_ratio = fields.FloatField( + help_text="Odds ratio", required=False, nullable=True + ) p_value = fields.StringField(help_text="p value", required=False, nullable=True) - confidence_range_95_low = fields.FloatField(help_text="Confidence range 95% low", required=False, nullable=True) - confidence_range_95_high = fields.FloatField(help_text="Confidence range 95% high", required=False, nullable=True) - confidence_comment = fields.StringField(help_text="Confidence comment", required=False, nullable=True) - initial_sample_size = fields.StringField(help_text="Initial sample size", required=False, nullable=True) - replication_sample_size = fields.StringField(help_text="Replication sample size", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int,), help_text="PubMed References", required=False, - nullable=True) + confidence_range_95_low = fields.FloatField( + help_text="Confidence range 95% low", required=False, nullable=True + ) + confidence_range_95_high = fields.FloatField( + help_text="Confidence range 95% high", required=False, nullable=True + ) + confidence_comment = fields.StringField( + help_text="Confidence comment", required=False, nullable=True + ) + initial_sample_size = fields.StringField( + help_text="Initial sample size", required=False, nullable=True + ) + replication_sample_size = fields.StringField( + help_text="Replication sample size", required=False, nullable=True + ) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PubMed References", required=False, nullable=True + ) class GWAS(models.Base): version = fields.StringField(help_text="Version") - items = fields.ListField(help_text='Details', items_types=(GWASDetails,)) + items = fields.ListField(help_text="Details", items_types=(GWASDetails,)) diff --git a/varsome_api/models/elements/iarc.py b/varsome_api/models/elements/iarc.py index a3db37f..a849fd8 100755 --- a/varsome_api/models/elements/iarc.py +++ b/varsome_api/models/elements/iarc.py @@ -15,20 +15,32 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class TP53GermlineDetails(models.Base): - age_at_diagnosis = fields.IntField(help_text="Age at diagnosis", required=False, nullable=True) + age_at_diagnosis = fields.IntField( + help_text="Age at diagnosis", required=False, nullable=True + ) country = fields.StringField(help_text="Country", required=False, nullable=True) effect = fields.StringField(help_text="Effect", required=False, nullable=True) - familycase = fields.StringField(help_text="Family case", required=False, nullable=True) - familycase_group = fields.StringField(help_text="Family case group", required=False, nullable=True) - family_code = fields.StringField(help_text="Family code", required=False, nullable=True) - generation = fields.StringField(help_text="Generation", required=False, nullable=True) - morphology = fields.StringField(help_text="Morphology", required=False, nullable=True) + familycase = fields.StringField( + help_text="Family case", required=False, nullable=True + ) + familycase_group = fields.StringField( + help_text="Family case group", required=False, nullable=True + ) + family_code = fields.StringField( + help_text="Family code", required=False, nullable=True + ) + generation = fields.StringField( + help_text="Generation", required=False, nullable=True + ) + morphology = fields.StringField( + help_text="Morphology", required=False, nullable=True + ) sex = fields.StringField(help_text="Sex", required=False, nullable=True) - topography = fields.StringField(help_text="Topography", required=False, nullable=True) + topography = fields.StringField( + help_text="Topography", required=False, nullable=True + ) unaffected = fields.IntField(help_text="Unaffected", required=False, nullable=True) @@ -36,21 +48,30 @@ class TP53SomaticDetails(models.Base): age = fields.IntField(help_text="Age", required=False, nullable=True) country = fields.StringField(help_text="Country", required=False, nullable=True) effect = fields.StringField(help_text="Effect", required=False, nullable=True) - morphology = fields.StringField(help_text="Morphology", required=False, nullable=True) + morphology = fields.StringField( + help_text="Morphology", required=False, nullable=True + ) mut_rate = fields.IntField(help_text="Mutation rate", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int,), help_text="PubMed References", required=False, - nullable=True) - sample_source = fields.StringField(help_text="Sample source", required=False, nullable=True) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PubMed References", required=False, nullable=True + ) + sample_source = fields.StringField( + help_text="Sample source", required=False, nullable=True + ) stage = fields.StringField(help_text="Stage", required=False, nullable=True) - structural_motif = fields.StringField(help_text="Structural Motif", required=False, nullable=True) - topography = fields.StringField(help_text="Topography", required=False, nullable=True) + structural_motif = fields.StringField( + help_text="Structural Motif", required=False, nullable=True + ) + topography = fields.StringField( + help_text="Topography", required=False, nullable=True + ) class TP53Germline(models.Base): version = fields.StringField(help_text="Version") - items = fields.ListField(help_text='Details', items_types=(TP53GermlineDetails,)) + items = fields.ListField(help_text="Details", items_types=(TP53GermlineDetails,)) class TP53Somatic(models.Base): version = fields.StringField(help_text="Version") - items = fields.ListField(help_text='Details', items_types=(TP53SomaticDetails,)) + items = fields.ListField(help_text="Details", items_types=(TP53SomaticDetails,)) diff --git a/varsome_api/models/elements/icgc.py b/varsome_api/models/elements/icgc.py index 8cfbaa0..c58ac02 100755 --- a/varsome_api/models/elements/icgc.py +++ b/varsome_api/models/elements/icgc.py @@ -14,11 +14,11 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class Occurrence(models.Base): - affected = fields.IntField(required=False, nullable=True, help_text="Affected number") + affected = fields.IntField( + required=False, nullable=True, help_text="Affected number" + ) donors = fields.IntField(required=False, nullable=True, help_text="Donors number") project = fields.StringField(required=False, nullable=True, help_text="Project") @@ -26,7 +26,15 @@ class Occurrence(models.Base): class Somatic(models.Base): version = fields.StringField(help_text="Version") id = fields.StringField(help_text="ID", required=False, nullable=True) - occurrence = fields.ListField(required=False, nullable=True, items_types=(Occurrence, ), help_text="Occurrence") - affected_donors = fields.IntField(help_text="Affected Donors", required=False, nullable=True) - project_count = fields.IntField(help_text="Project Count", required=False, nullable=True) - main_data = fields.StringField(help_text="Main data point", required=False, nullable=True) \ No newline at end of file + occurrence = fields.ListField( + required=False, nullable=True, items_types=(Occurrence,), help_text="Occurrence" + ) + affected_donors = fields.IntField( + help_text="Affected Donors", required=False, nullable=True + ) + project_count = fields.IntField( + help_text="Project Count", required=False, nullable=True + ) + main_data = fields.StringField( + help_text="Main data point", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/isb.py b/varsome_api/models/elements/isb.py index 33897c8..a128fa3 100755 --- a/varsome_api/models/elements/isb.py +++ b/varsome_api/models/elements/isb.py @@ -14,11 +14,13 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class Kaviar3(models.Base): version = fields.StringField(help_text="Version") - ac = fields.ListField(items_types=(int, ), help_text="ac", required=False, nullable=True) - an = fields.ListField(items_types=(int,), help_text="an", required=False, nullable=True) - main_data = fields.StringField(help_text="Main data point", required=False) \ No newline at end of file + ac = fields.ListField( + items_types=(int,), help_text="ac", required=False, nullable=True + ) + an = fields.ListField( + items_types=(int,), help_text="an", required=False, nullable=True + ) + main_data = fields.StringField(help_text="Main data point", required=False) diff --git a/varsome_api/models/elements/ncbi.py b/varsome_api/models/elements/ncbi.py index 7b758f8..8382232 100755 --- a/varsome_api/models/elements/ncbi.py +++ b/varsome_api/models/elements/ncbi.py @@ -16,8 +16,6 @@ from varsome_api.models.fields import DictField -__author__ = "ckopanos" - class DbSNP(models.Base): version = fields.StringField(help_text="Version") @@ -26,15 +24,36 @@ class DbSNP(models.Base): class ClinVar2(models.Base): version = fields.StringField(help_text="Version") - review_status = fields.StringField(help_text="Review status", required=False, nullable=True) - review_stars = fields.IntField(help_text="Review stars", required=False, nullable=True) - variation_id = fields.IntField(help_text="Variation ID", required=False, nullable=True) - num_submitters = fields.IntField(help_text="Number of submitters", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int,), help_text="PubMed references", required=False, - nullable=True) - clinical_significance = fields.ListField(items_types=(str,), help_text="Clinical significance", required=False, - nullable=True) - last_evaluation = fields.StringField(help_text="Last evaluation", required=False, nullable=True) - origin = fields.ListField(items_types=(str,), help_text="Origin", required=False, nullable=True) - accessions = fields.ListField(items_types=(dict,), help_text="Accessions", required=False, nullable=True) - main_data = fields.StringField(help_text="Main data point", required=False, nullable=True) + review_status = fields.StringField( + help_text="Review status", required=False, nullable=True + ) + review_stars = fields.IntField( + help_text="Review stars", required=False, nullable=True + ) + variation_id = fields.IntField( + help_text="Variation ID", required=False, nullable=True + ) + num_submitters = fields.IntField( + help_text="Number of submitters", required=False, nullable=True + ) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PubMed references", required=False, nullable=True + ) + clinical_significance = fields.ListField( + items_types=(str,), + help_text="Clinical significance", + required=False, + nullable=True, + ) + last_evaluation = fields.StringField( + help_text="Last evaluation", required=False, nullable=True + ) + origin = fields.ListField( + items_types=(str,), help_text="Origin", required=False, nullable=True + ) + accessions = fields.ListField( + items_types=(dict,), help_text="Accessions", required=False, nullable=True + ) + main_data = fields.StringField( + help_text="Main data point", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/sanger.py b/varsome_api/models/elements/sanger.py index 6268da7..75abad2 100755 --- a/varsome_api/models/elements/sanger.py +++ b/varsome_api/models/elements/sanger.py @@ -14,84 +14,200 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class Cosmic(models.Base): version = fields.StringField(help_text="Version") - primary_site = fields.ListField(items_types=(str,), help_text="Primary site", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int,), help_text="PUBMED References", required=False, - nullable=True) + primary_site = fields.ListField( + items_types=(str,), help_text="Primary site", required=False, nullable=True + ) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PUBMED References", required=False, nullable=True + ) class CosmicLicensedDrugEntry(models.Base): drug_name = fields.StringField(help_text="Drug name", required=False, nullable=True) - somatic_status = fields.StringField(help_text="Somatic status", required=False, nullable=True) + somatic_status = fields.StringField( + help_text="Somatic status", required=False, nullable=True + ) zygosity = fields.StringField(help_text="Zygosity", required=False, nullable=True) gene = fields.StringField(help_text="Gene", required=False, nullable=True) - transcript = fields.StringField(help_text="Transcript", required=False, nullable=True) - census_gene = fields.StringField(help_text="Census gene", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int,), help_text="PUBMED References", required=False, - nullable=True) - histology_freq = fields.ListField(items_types=(float,), help_text="Histology frequency", required=False, - nullable=True) - tissue_freq = fields.ListField(items_types=(float,), help_text="Tissue frequency", required=False, nullable=True) + transcript = fields.StringField( + help_text="Transcript", required=False, nullable=True + ) + census_gene = fields.StringField( + help_text="Census gene", required=False, nullable=True + ) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PUBMED References", required=False, nullable=True + ) + histology_freq = fields.ListField( + items_types=(float,), + help_text="Histology frequency", + required=False, + nullable=True, + ) + tissue_freq = fields.ListField( + items_types=(float,), + help_text="Tissue frequency", + required=False, + nullable=True, + ) class CosmicLicensedDetails(models.Base): - entry_type = fields.StringField(help_text="Entry type", required=False, nullable=True) - cosmic_id = fields.ListField(items_types=(str, dict,), help_text="Cosmic ID", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int,), help_text="PUBMED References", required=False, - nullable=True) - histology_freq = fields.ListField(items_types=(int, str, float), help_text="Histology frequency", required=False, - nullable=True) - genome_wide_screen_freq = fields.ListField(items_types=(int, str, float), help_text="Histology frequency", required=False, - nullable=True) - loh_freq = fields.ListField(items_types=(int, str, float), help_text="LOH frequency", required=False, nullable=True) - age_freq = fields.ListField(items_types=(int, str, float), help_text="Age frequency", required=False, nullable=True) - zygosity_freq = fields.ListField(items_types=(int, str, float), help_text="Zygosity frequency", required=False, - nullable=True) - tumour_origin_freq = fields.ListField(items_types=(int, str, float,), help_text="Tumour original frequency", required=False, - nullable=True) - somatic_status_freq = fields.ListField(items_types=(int, str, float), help_text="Somatic status frequency", required=False, - nullable=True) - primary_site_freq = fields.ListField(items_types=(int, str, float), help_text="Primary site frequency", required=False, nullable=True) - description = fields.ListField(items_types=(str,), help_text="Description", required=False, nullable=True) - accession_number = fields.ListField(items_types=(str,), help_text="Accession number", required=False, nullable=True) - fathmm_prediction = fields.StringField(help_text="FATHMM prediction", required=False, nullable=True) - fathmm_score = fields.FloatField(help_text="FATHMM score", required=False, nullable=True) - num_entries = fields.IntField(help_text="Number of entries", required=False, nullable=True) - num_samples = fields.IntField(help_text="Number of samples", required=False, nullable=True) - gene = fields.ListField(items_types=(str,), help_text="Gene", required=False, nullable=True) - - fathmm_mkl_coding_score = fields.FloatField(help_text="FATHMM_MKL coding score", required=False, nullable=True) - fathmm_mkl_coding_groups = fields.StringField(help_text="FATHMM_MKL coding groups", required=False, nullable=True) - fathmm_mkl_non_coding_score = fields.FloatField(help_text="FATHMM_MKL non coding score", required=False, - nullable=True) - fathmm_mkl_non_coding_groups = fields.StringField(help_text="FATHMM_MKL non coding groups", required=False, - nullable=True) - whole_exome_freq = fields.ListField(items_types=(str, int, float,), help_text="Whole exome frequency", required=False, - nullable=True) - whole_genome_reseq_freq = fields.ListField(items_types=(str, int, float,), help_text="Whole genome reseq frequency", - required=False, nullable=True) - - resistance_mutation = fields.ListField(items_types=(str,), help_text="Resistance mutation", required=False, - nullable=True) - drug_entries = fields.ListField(items_types=(CosmicLicensedDrugEntry,), help_text="Drug entries", required=False, - nullable=True) + entry_type = fields.StringField( + help_text="Entry type", required=False, nullable=True + ) + cosmic_id = fields.ListField( + items_types=( + str, + dict, + ), + help_text="Cosmic ID", + required=False, + nullable=True, + ) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PUBMED References", required=False, nullable=True + ) + histology_freq = fields.ListField( + items_types=(int, str, float), + help_text="Histology frequency", + required=False, + nullable=True, + ) + genome_wide_screen_freq = fields.ListField( + items_types=(int, str, float), + help_text="Histology frequency", + required=False, + nullable=True, + ) + loh_freq = fields.ListField( + items_types=(int, str, float), + help_text="LOH frequency", + required=False, + nullable=True, + ) + age_freq = fields.ListField( + items_types=(int, str, float), + help_text="Age frequency", + required=False, + nullable=True, + ) + zygosity_freq = fields.ListField( + items_types=(int, str, float), + help_text="Zygosity frequency", + required=False, + nullable=True, + ) + tumour_origin_freq = fields.ListField( + items_types=( + int, + str, + float, + ), + help_text="Tumour original frequency", + required=False, + nullable=True, + ) + somatic_status_freq = fields.ListField( + items_types=(int, str, float), + help_text="Somatic status frequency", + required=False, + nullable=True, + ) + primary_site_freq = fields.ListField( + items_types=(int, str, float), + help_text="Primary site frequency", + required=False, + nullable=True, + ) + description = fields.ListField( + items_types=(str,), help_text="Description", required=False, nullable=True + ) + accession_number = fields.ListField( + items_types=(str,), help_text="Accession number", required=False, nullable=True + ) + fathmm_prediction = fields.StringField( + help_text="FATHMM prediction", required=False, nullable=True + ) + fathmm_score = fields.FloatField( + help_text="FATHMM score", required=False, nullable=True + ) + num_entries = fields.IntField( + help_text="Number of entries", required=False, nullable=True + ) + num_samples = fields.IntField( + help_text="Number of samples", required=False, nullable=True + ) + gene = fields.ListField( + items_types=(str,), help_text="Gene", required=False, nullable=True + ) + + fathmm_mkl_coding_score = fields.FloatField( + help_text="FATHMM_MKL coding score", required=False, nullable=True + ) + fathmm_mkl_coding_groups = fields.StringField( + help_text="FATHMM_MKL coding groups", required=False, nullable=True + ) + fathmm_mkl_non_coding_score = fields.FloatField( + help_text="FATHMM_MKL non coding score", required=False, nullable=True + ) + fathmm_mkl_non_coding_groups = fields.StringField( + help_text="FATHMM_MKL non coding groups", required=False, nullable=True + ) + whole_exome_freq = fields.ListField( + items_types=( + str, + int, + float, + ), + help_text="Whole exome frequency", + required=False, + nullable=True, + ) + whole_genome_reseq_freq = fields.ListField( + items_types=( + str, + int, + float, + ), + help_text="Whole genome reseq frequency", + required=False, + nullable=True, + ) + + resistance_mutation = fields.ListField( + items_types=(str,), + help_text="Resistance mutation", + required=False, + nullable=True, + ) + drug_entries = fields.ListField( + items_types=(CosmicLicensedDrugEntry,), + help_text="Drug entries", + required=False, + nullable=True, + ) class ComsicPublicDetails(models.Base): - num_samples = fields.IntField(help_text='Number of samples') - id = fields.StringField(help_text='Cosmic ID') - is_consistent = fields.BoolField(help_text='Cosmic ID is consistent across databases') - - + num_samples = fields.IntField(help_text="Number of samples") + id = fields.StringField(help_text="Cosmic ID") + is_consistent = fields.BoolField( + help_text="Cosmic ID is consistent across databases" + ) class CosmicPublic(models.Base): version = fields.StringField(help_text="Version") - items = fields.ListField(items_types=(ComsicPublicDetails, ), help_text="Details", required=False, nullable=True) + items = fields.ListField( + items_types=(ComsicPublicDetails,), + help_text="Details", + required=False, + nullable=True, + ) class CosmicLicensed(models.Base): diff --git a/varsome_api/models/elements/thousand_genomes.py b/varsome_api/models/elements/thousand_genomes.py index 20e79cf..30629cb 100755 --- a/varsome_api/models/elements/thousand_genomes.py +++ b/varsome_api/models/elements/thousand_genomes.py @@ -14,22 +14,54 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class ThousandGenomes(models.Base): version = fields.StringField(help_text="Version") - ac = fields.ListField(items_types=(int,), help_text="Allele Count", required=False, nullable=True) - af = fields.ListField(items_types=(float,), help_text="Allele Frequency", required=False, nullable=True) - an = fields.ListField(items_types=(int,), help_text="Allele Number", required=False, nullable=True) - ns = fields.ListField(items_types=(int,), help_text="Number of Samples", required=False, nullable=True) - afr_af = fields.ListField(items_types=(float,), help_text="Allele Frequency African", required=False, nullable=True) - amr_af = fields.ListField(items_types=(float,), help_text="Allele Frequency American", required=False, - nullable=True) - eas_af = fields.ListField(items_types=(float,), help_text="Allele Frequency East Asian", required=False, - nullable=True) - eur_af = fields.ListField(items_types=(float,), help_text="Allele Frequency European", required=False, - nullable=True) - sas_af = fields.ListField(items_types=(float,), help_text="Allele Frequency South Asian", required=False, - nullable=True) - main_data = fields.StringField(help_text="Main data point", required=False, nullable=True) + ac = fields.ListField( + items_types=(int,), help_text="Allele Count", required=False, nullable=True + ) + af = fields.ListField( + items_types=(float,), + help_text="Allele Frequency", + required=False, + nullable=True, + ) + an = fields.ListField( + items_types=(int,), help_text="Allele Number", required=False, nullable=True + ) + ns = fields.ListField( + items_types=(int,), help_text="Number of Samples", required=False, nullable=True + ) + afr_af = fields.ListField( + items_types=(float,), + help_text="Allele Frequency African", + required=False, + nullable=True, + ) + amr_af = fields.ListField( + items_types=(float,), + help_text="Allele Frequency American", + required=False, + nullable=True, + ) + eas_af = fields.ListField( + items_types=(float,), + help_text="Allele Frequency East Asian", + required=False, + nullable=True, + ) + eur_af = fields.ListField( + items_types=(float,), + help_text="Allele Frequency European", + required=False, + nullable=True, + ) + sas_af = fields.ListField( + items_types=(float,), + help_text="Allele Frequency South Asian", + required=False, + nullable=True, + ) + main_data = fields.StringField( + help_text="Main data point", required=False, nullable=True + ) diff --git a/varsome_api/models/elements/transcript.py b/varsome_api/models/elements/transcript.py index 0b9b2af..1194703 100755 --- a/varsome_api/models/elements/transcript.py +++ b/varsome_api/models/elements/transcript.py @@ -15,23 +15,35 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class TranscriptItem(models.Base): name = fields.StringField(help_text="Transcript") - coding_impact = fields.StringField(help_text="Coding impact", required=False, nullable=True) - function = fields.ListField(help_text="Function", items_types=(str,), required=False, nullable=True) - hgvs = fields.StringField(required=False, help_text="HGVS cDNA level", nullable=True) + coding_impact = fields.StringField( + help_text="Coding impact", required=False, nullable=True + ) + function = fields.ListField( + help_text="Function", items_types=(str,), required=False, nullable=True + ) + hgvs = fields.StringField( + required=False, help_text="HGVS cDNA level", nullable=True + ) hgvs_p1 = fields.StringField(required=False, nullable=True) hgvs_p3 = fields.StringField(required=False, nullable=True) - hgvs_notation = fields.StringField(help_text="HGVS notation", required=False, nullable=True) + hgvs_notation = fields.StringField( + help_text="HGVS notation", required=False, nullable=True + ) location = fields.StringField(help_text="Location", required=False, nullable=True) - coding_location = fields.StringField(help_text="Coding location", required=False, nullable=True) + coding_location = fields.StringField( + help_text="Coding location", required=False, nullable=True + ) canonical = fields.BoolField(help_text="Canonical", required=False, nullable=True) - gene_symbol = fields.StringField(help_text="Gene symbol", required=False, nullable=True) + gene_symbol = fields.StringField( + help_text="Gene symbol", required=False, nullable=True + ) class Transcript(models.Base): - items = fields.ListField(help_text='Transcripts', items_types=(TranscriptItem,), required=False) + items = fields.ListField( + help_text="Transcripts", items_types=(TranscriptItem,), required=False + ) version = fields.StringField(help_text="Version") diff --git a/varsome_api/models/elements/uniprot.py b/varsome_api/models/elements/uniprot.py index 30855da..0447526 100755 --- a/varsome_api/models/elements/uniprot.py +++ b/varsome_api/models/elements/uniprot.py @@ -15,22 +15,36 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class UniprotVariantsDetails(models.Base): - annotation_id = fields.StringField(help_text="Annotation ID", required=False, nullable=True) - protein_id = fields.StringField(help_text="Protein ID", required=False, nullable=True) - bed_comments = fields.ListField(help_text="Comments", items_types=(str, ), required=False, nullable=True) + annotation_id = fields.StringField( + help_text="Annotation ID", required=False, nullable=True + ) + protein_id = fields.StringField( + help_text="Protein ID", required=False, nullable=True + ) + bed_comments = fields.ListField( + help_text="Comments", items_types=(str,), required=False, nullable=True + ) gene = fields.StringField(help_text="Gene", required=False, nullable=True) - variant_type = fields.StringField(help_text="Variant type", required=False, nullable=True) - transcripts = fields.ListField(help_text="Transcripts", items_types=(str,), required=False, nullable=True) - pub_med_references = fields.ListField(help_text="PubMed References", items_types=(int,), required=False, nullable=True) + variant_type = fields.StringField( + help_text="Variant type", required=False, nullable=True + ) + transcripts = fields.ListField( + help_text="Transcripts", items_types=(str,), required=False, nullable=True + ) + pub_med_references = fields.ListField( + help_text="PubMed References", items_types=(int,), required=False, nullable=True + ) disease = fields.StringField(help_text="Disease", required=False, nullable=True) - disease_symbol = fields.StringField(help_text="Disease symbol", required=False, nullable=True) - disease_alt_symbol = fields.StringField(help_text="Disease alt symbol", required=False, nullable=True) + disease_symbol = fields.StringField( + help_text="Disease symbol", required=False, nullable=True + ) + disease_alt_symbol = fields.StringField( + help_text="Disease alt symbol", required=False, nullable=True + ) class UniprotVariants(models.Base): version = fields.StringField(help_text="Version") - items = fields.ListField(help_text='Details', items_types=(UniprotVariantsDetails,)) + items = fields.ListField(help_text="Details", items_types=(UniprotVariantsDetails,)) diff --git a/varsome_api/models/elements/wustl.py b/varsome_api/models/elements/wustl.py index e2633d7..343c03b 100755 --- a/varsome_api/models/elements/wustl.py +++ b/varsome_api/models/elements/wustl.py @@ -14,33 +14,61 @@ from jsonmodels import models, fields -__author__ = "ckopanos" - class CivicDetails(models.Base): variant = fields.StringField(help_text="Variant", required=False, nullable=True) - variant_summary = fields.StringField(help_text="Variant summary", required=False, nullable=True) - variant_civic_url = fields.StringField(help_text="Variant CIViC URL", required=False, nullable=True) - variant_origin = fields.StringField(help_text="Variant origin", required=False, nullable=True) - pub_med_references = fields.ListField(items_types=(int, ), help_text="PubMed References", required=False, nullable=True) - clinical_significance = fields.StringField(help_text="Clinical significance", required=False, nullable=True) - evidence_level = fields.StringField(help_text="Evidence level", required=False, nullable=True) - evidence_statement = fields.StringField(help_text="Evidence statement", required=False, nullable=True) - evidence_type = fields.StringField(help_text="Evidence type", required=False, nullable=True) - evidence_status = fields.StringField(help_text="Evidence status", required=False, nullable=True) - evidence_direction = fields.StringField(help_text="Evidence direction", required=False, nullable=True) - evidence_civic_url = fields.StringField(help_text="Evidence CIViC URL", required=False, nullable=True) - drugs = fields.ListField(items_types=(str, ), help_text="Drugs", required=False, nullable=True) - transcripts = fields.ListField(items_types=(str, ), help_text="Transcripts", required=False, nullable=True) - representative_transcript = fields.StringField(help_text="Representative transcript", required=False, nullable=True) + variant_summary = fields.StringField( + help_text="Variant summary", required=False, nullable=True + ) + variant_civic_url = fields.StringField( + help_text="Variant CIViC URL", required=False, nullable=True + ) + variant_origin = fields.StringField( + help_text="Variant origin", required=False, nullable=True + ) + pub_med_references = fields.ListField( + items_types=(int,), help_text="PubMed References", required=False, nullable=True + ) + clinical_significance = fields.StringField( + help_text="Clinical significance", required=False, nullable=True + ) + evidence_level = fields.StringField( + help_text="Evidence level", required=False, nullable=True + ) + evidence_statement = fields.StringField( + help_text="Evidence statement", required=False, nullable=True + ) + evidence_type = fields.StringField( + help_text="Evidence type", required=False, nullable=True + ) + evidence_status = fields.StringField( + help_text="Evidence status", required=False, nullable=True + ) + evidence_direction = fields.StringField( + help_text="Evidence direction", required=False, nullable=True + ) + evidence_civic_url = fields.StringField( + help_text="Evidence CIViC URL", required=False, nullable=True + ) + drugs = fields.ListField( + items_types=(str,), help_text="Drugs", required=False, nullable=True + ) + transcripts = fields.ListField( + items_types=(str,), help_text="Transcripts", required=False, nullable=True + ) + representative_transcript = fields.StringField( + help_text="Representative transcript", required=False, nullable=True + ) disease = fields.StringField(help_text="Disease", required=False, nullable=True) rating = fields.StringField(help_text="Rating", required=False, nullable=True) gene = fields.StringField(help_text="Gene", required=False, nullable=True) - gene_civic_url = fields.StringField(help_text="Gene CIViC URL", required=False, nullable=True) + gene_civic_url = fields.StringField( + help_text="Gene CIViC URL", required=False, nullable=True + ) entrez_id = fields.StringField(help_text="Entrez ID", required=False, nullable=True) doid = fields.StringField(help_text="DOID", required=False, nullable=True) class Civic(models.Base): version = fields.StringField(help_text="Version") - items = fields.ListField(help_text='Details', items_types=(CivicDetails, )) \ No newline at end of file + items = fields.ListField(help_text="Details", items_types=(CivicDetails,)) diff --git a/varsome_api/models/fields.py b/varsome_api/models/fields.py index f0256f7..5680874 100755 --- a/varsome_api/models/fields.py +++ b/varsome_api/models/fields.py @@ -14,8 +14,6 @@ from jsonmodels import fields -__author__ = "ckopanos" - class DictField(fields.BaseField): @@ -42,7 +40,6 @@ def parse_value(self, values): if not isinstance(values, list): return values - return [self._cast_value(value) if value is not None else None for value in values] - - - + return [ + self._cast_value(value) if value is not None else None for value in values + ] diff --git a/varsome_api/models/variant.py b/varsome_api/models/variant.py index fbb02ee..8227e08 100755 --- a/varsome_api/models/variant.py +++ b/varsome_api/models/variant.py @@ -11,11 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .fields import DictField from .elements import * -__author__ = "ckopanos" - class AnnotatedVariant(models.Base): """ @@ -28,35 +25,79 @@ class AnnotatedVariant(models.Base): alt = fields.StringField(help_text="ALT Sequence", required=False, nullable=True) ref = fields.StringField(help_text="REF Sequence", required=False, nullable=True) pos = fields.IntField(help_text="Position") - variant_id = fields.StringField(help_text='Variant Id') - refseq_transcripts = fields.ListField(required=False, items_types=(Transcript,), help_text="RefSeq Transcripts") - ensembl_transcripts = fields.ListField(required=False, items_types=(Transcript,), help_text="Ensembl Transcripts") + variant_id = fields.StringField(help_text="Variant Id") + refseq_transcripts = fields.ListField( + required=False, items_types=(Transcript,), help_text="RefSeq Transcripts" + ) + ensembl_transcripts = fields.ListField( + required=False, items_types=(Transcript,), help_text="Ensembl Transcripts" + ) broad_exac = fields.ListField(required=False, items_types=(ExAC,), help_text="ExAC") - gnomad_exomes = fields.ListField(required=False, items_types=(GnomAD,), help_text="gnomAD Exomes (ExAC)") - gnomad_exomes_coverage = fields.ListField(required=False, items_types=(GnomADCoverage,), - help_text="gnomAD exomes coverage") - gnomad_genomes = fields.ListField(required=False, items_types=(GnomAD,), help_text="gnomAD Genomes") - gnomad_genomes_coverage = fields.ListField(required=False, items_types=(GnomADCoverage,), - help_text="gnomAD genomes coverage") - thousand_genomes = fields.ListField(required=False, items_types=(ThousandGenomes,), help_text="1000 Genomes") + gnomad_exomes = fields.ListField( + required=False, items_types=(GnomAD,), help_text="gnomAD Exomes (ExAC)" + ) + gnomad_exomes_coverage = fields.ListField( + required=False, + items_types=(GnomADCoverage,), + help_text="gnomAD exomes coverage", + ) + gnomad_genomes = fields.ListField( + required=False, items_types=(GnomAD,), help_text="gnomAD Genomes" + ) + gnomad_genomes_coverage = fields.ListField( + required=False, + items_types=(GnomADCoverage,), + help_text="gnomAD genomes coverage", + ) + thousand_genomes = fields.ListField( + required=False, items_types=(ThousandGenomes,), help_text="1000 Genomes" + ) gerp = fields.ListField(required=False, items_types=(Gerp,), help_text="GERP") - isb_kaviar3 = fields.ListField(required=False, items_types=(Kaviar3,), help_text='ISB Kaviar3') + isb_kaviar3 = fields.ListField( + required=False, items_types=(Kaviar3,), help_text="ISB Kaviar3" + ) dbnsfp = fields.ListField(required=False, items_types=(DbNSFP,), help_text="dbNSFP") - dann_snvs = fields.ListField(required=False, items_types=(DannSNVs,), help_text="DANN score") - dbnsfp_dbscsnv = fields.ListField(required=False, items_types=(DBscSNV,), help_text='dbNSFP dbscSNV') - ncbi_dbsnp = fields.ListField(required=False, items_types=(DbSNP,), help_text="dbSNP") - sanger_cosmic = fields.ListField(required=False, items_types=(Cosmic,), help_text="Sanger Cosmic") - sanger_cosmic_public = fields.ListField(required=False, items_types=(CosmicPublic,), help_text="Cosmic") - sanger_cosmic_licensed = fields.ListField(required=False, items_types=(CosmicLicensed,), help_text="Cosmic") - ncbi_clinvar2 = fields.ListField(required=False, items_types=(ClinVar2,), help_text="ClinVar2") - icgc_somatic = fields.ListField(required=False, items_types=(Somatic,), help_text="ICGC Somatic") - iarc_tp53_germline = fields.ListField(required=False, items_types=(TP53Germline,), help_text="IARC TP53 Germline") - iarc_tp53_somatic = fields.ListField(required=False, items_types=(TP53Somatic,), help_text="IARC TP53 Somatic") + dann_snvs = fields.ListField( + required=False, items_types=(DannSNVs,), help_text="DANN score" + ) + dbnsfp_dbscsnv = fields.ListField( + required=False, items_types=(DBscSNV,), help_text="dbNSFP dbscSNV" + ) + ncbi_dbsnp = fields.ListField( + required=False, items_types=(DbSNP,), help_text="dbSNP" + ) + sanger_cosmic = fields.ListField( + required=False, items_types=(Cosmic,), help_text="Sanger Cosmic" + ) + sanger_cosmic_public = fields.ListField( + required=False, items_types=(CosmicPublic,), help_text="Cosmic" + ) + sanger_cosmic_licensed = fields.ListField( + required=False, items_types=(CosmicLicensed,), help_text="Cosmic" + ) + ncbi_clinvar2 = fields.ListField( + required=False, items_types=(ClinVar2,), help_text="ClinVar2" + ) + icgc_somatic = fields.ListField( + required=False, items_types=(Somatic,), help_text="ICGC Somatic" + ) + iarc_tp53_germline = fields.ListField( + required=False, items_types=(TP53Germline,), help_text="IARC TP53 Germline" + ) + iarc_tp53_somatic = fields.ListField( + required=False, items_types=(TP53Somatic,), help_text="IARC TP53 Somatic" + ) pub_med_articles = DictField(required=False, help_text="PUBMED Articles") - uniprot_variants = fields.ListField(required=False, items_types=(UniprotVariants,), help_text="UniProt variants") - wustl_civic = fields.ListField(required=False, items_types=(Civic,), help_text="CIViC") + uniprot_variants = fields.ListField( + required=False, items_types=(UniprotVariants,), help_text="UniProt variants" + ) + wustl_civic = fields.ListField( + required=False, items_types=(Civic,), help_text="CIViC" + ) gwas = fields.ListField(required=False, items_types=(GWAS,), help_text="GWAS") - acmg_annotation = fields.EmbeddedField(ACMG, required=False, nullable=True, help_text="ACMG Annotations") + acmg_annotation = fields.EmbeddedField( + ACMG, required=False, nullable=True, help_text="ACMG Annotations" + ) @property def genes(self): @@ -77,7 +118,9 @@ def refseq_genes(self): """ genes = [] for transcript in self.refseq_transcripts: - genes.extend([item.gene_symbol for item in transcript.items if item.gene_symbol]) + genes.extend( + [item.gene_symbol for item in transcript.items if item.gene_symbol] + ) return genes @property @@ -88,7 +131,9 @@ def ensembl_genes(self): """ genes = [] for transcript in self.ensembl_transcripts: - genes.extend([item.gene_symbol for item in transcript.items if item.gene_symbol]) + genes.extend( + [item.gene_symbol for item in transcript.items if item.gene_symbol] + ) return genes @property @@ -136,3 +181,13 @@ def gnomad_genomes_an(self): """ an = [gnomad_genomes.an for gnomad_genomes in self.gnomad_genomes] return an[0] if an else None + + @property + def acmg_verdict(self): + """ + :return: the acmg verdict for the variant + """ + acmg_annotation = self.acmg_annotation + if acmg_annotation is not None and acmg_annotation.verdict is not None: + return acmg_annotation.verdict.ACMG_rules.verdict + return None diff --git a/varsome_api/tests/__init__.py b/varsome_api/tests/__init__.py deleted file mode 100644 index da44d8e..0000000 --- a/varsome_api/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = "ckopanos" \ No newline at end of file diff --git a/varsome_api/vcf.py b/varsome_api/vcf.py index 752e726..ddbdac1 100755 --- a/varsome_api/vcf.py +++ b/varsome_api/vcf.py @@ -11,17 +11,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import contextlib import os import time -import vcf from collections import OrderedDict -from vcf.parser import _Info + +import vcf +from vcf.parser import _Info, _encode_type + from varsome_api.client import VarSomeAPIClient from varsome_api.models.variant import AnnotatedVariant -__author__ = "ckopanos" + +@contextlib.contextmanager +def vcf_writer(*args, **kwargs): + writer = vcf.Writer(*args, **kwargs) + yield writer + writer.close() + + +@contextlib.contextmanager +def vcf_reader(*args, **kwargs): + reader = vcf.Reader(*args, **kwargs) + yield reader + reader._reader.close() class VCFAnnotator(VarSomeAPIClient): @@ -29,63 +42,95 @@ class VCFAnnotator(VarSomeAPIClient): VCFAnnotator will take an input vcf file parse it and produce an annotated vcf file """ - def __init__(self, api_key=None, - max_variants_per_batch=1000, ref_genome='hg19', get_parameters=None, max_threads=None): - super().__init__(api_key, max_variants_per_batch) + def __init__( + self, + api_key=None, + logger=None, + api_url=None, + max_variants_per_batch=1000, + ref_genome="hg19", + get_parameters=None, + max_threads=None, + ): + super().__init__(api_key, logger, api_url, max_variants_per_batch) self.ref_genome = ref_genome self.get_parameters = get_parameters - self.total_varialts = 0 + self.total_variants = 0 self.filtered_out_variants = 0 self.variants_with_errors = 0 self.max_threads = max_threads or 1 if self.max_variants_per_batch > 3000 and self.max_threads > 1: - self.logger.warning("Having more than 1 thread with more than 3000 variants per batch may not be optimal") + self.logger.warning( + "Having more than 1 thread with more than 3000 variants per batch may not be optimal" + ) - def _process_request(self, input_batch): + def _process_request(self, input_batch, writer): start = time.time() - api_results = self.batch_lookup(list(input_batch.keys()), params=self.get_parameters, - ref_genome=self.ref_genome, max_threads=self.max_threads) + input_batch_variants = list(input_batch.keys()) + api_results = self.batch_lookup( + input_batch_variants, + params=self.get_parameters, + ref_genome=self.ref_genome, + max_threads=self.max_threads, + ) duration = time.time() - start - self.logger.info('Annotated %s variants in %s' % (len(input_batch), duration)) - self.logger.info('Writing to output vcf file') - for i, requested_variant in enumerate(input_batch.keys()): + self.logger.info( + "Annotated %s variants from a source of %s in %s" + % (len(api_results), len(input_batch), duration) + ) + self.logger.info("Writing to output vcf file") + + for i, results in enumerate(api_results): try: - results = api_results[i] - record = input_batch[requested_variant] + record = input_batch[input_batch_variants[i]] if results: - if 'filtered_out' in results: - self.logger.info(results['filtered_out']) + if "filtered_out" in results: + self.logger.info( + "%s: %s" + % (input_batch_variants[i], results["filtered_out"]) + ) self.filtered_out_variants += 1 continue - if 'error' in results: - self.logger.error(results['error']) + if "error" in results: + self.logger.error( + "%s: %s" % (input_batch_variants[i], results["error"]) + ) self.variants_with_errors += 1 continue - if 'variant_id' in results: + if results.get("variant_id"): variant_result = AnnotatedVariant(**results) - record = self.annotate_record(record, variant_result) - self.vcf_writer.write_record(record) + record = self.annotate_record( + record, variant_result, input_batch_variants[i] + ) + writer.write_record(record) else: - self.logger.error(results) + self.logger.error("%s: %s" % (input_batch_variants[i], results)) self.variants_with_errors += 1 except Exception as e: - self.logger.error(e) + self.logger.error("Result set error %s, %s" % (e, results)) self.variants_with_errors += 1 pass # log an exception.. - def annotate_record(self, record, variant_result): + def annotate_record(self, record, variant_result, original_variant): """ Method to annotate a record. You should override this with your own implementation to include variant result properties you want in your output vcf :param record: vcf record object :param variant_result: AnnotatedVariant object + :param original_variant: The variant as present in the request :return: annotated record object """ - record.INFO['variant_id'] = variant_result.variant_id - record.INFO['gene'] = ",".join(variant_result.genes) - record.INFO['gnomad_exomes_AF'] = variant_result.gnomad_exomes_af - record.INFO['gnomad_genomes_AF'] = variant_result.gnomad_genomes_af - record.ALT = variant_result.alt + record.INFO["variant_id"] = variant_result.variant_id + record.INFO["gene"] = ",".join(variant_result.genes) + record.INFO["gnomad_exomes_AF"] = variant_result.gnomad_exomes_af + record.INFO["gnomad_genomes_AF"] = variant_result.gnomad_genomes_af + acmg_verdict = variant_result.acmg_verdict + if acmg_verdict is not None: + acmg_verdict = acmg_verdict.replace(" ", "_") + record.INFO["acmg_verdict"] = acmg_verdict + record.INFO["original_variant"] = original_variant + record.REF = variant_result.ref or "." + record.ALT = [variant_result.alt] record.POS = variant_result.pos record.ID = ";".join(variant_result.rs_ids) or "." return record @@ -98,12 +143,60 @@ def add_vcf_header_info(self, vcf_template): :param vcf_template: vcf reader object :return: """ - vcf_template.infos['variant_id'] = _Info('variant_id', 1, 'Integer', 'Saphetor variant identifier', None, None) - vcf_template.infos['gene'] = _Info('gene', '.', 'String', 'Genes related to this variant', None, None) - vcf_template.infos['gnomad_exomes_AF'] = _Info('gnomad_exomes_AF', '.', 'Float', - 'GnomAD exomes allele frequency value', None, None) - vcf_template.infos['gnomad_genomes_AF'] = _Info('gnomad_genomes_AF', '.', 'Float', - 'GnomAD genomes allele frequency value', None, None) + vcf_template.infos["variant_id"] = _Info( + "variant_id", + 1, + "Integer", + "Saphetor variant identifier", + None, + None, + _encode_type("Integer"), + ) + vcf_template.infos["gene"] = _Info( + "gene", + ".", + "String", + "Genes related to this variant", + None, + None, + _encode_type("String"), + ) + vcf_template.infos["gnomad_exomes_AF"] = _Info( + "gnomad_exomes_AF", + 1, + "Float", + "GnomAD exomes allele frequency value", + None, + None, + _encode_type("Float"), + ) + vcf_template.infos["gnomad_genomes_AF"] = _Info( + "gnomad_genomes_AF", + 1, + "Float", + "GnomAD genomes allele frequency value", + None, + None, + _encode_type("Float"), + ) + vcf_template.infos["acmg_verdict"] = _Info( + "acmg_verdict", + ".", + "String", + "ACMG Classification Verdict", + None, + None, + _encode_type("String"), + ) + vcf_template.infos["original_variant"] = _Info( + "original_variant", + ".", + "String", + "Variant as present in the request", + None, + None, + _encode_type("String"), + ) def annotate(self, input_vcf_file, output_vcf_file=None, template=None, **kwargs): """ @@ -116,32 +209,49 @@ def annotate(self, input_vcf_file, output_vcf_file=None, template=None, **kwargs """ annotations_start = time.time() if not os.path.isfile(input_vcf_file): - raise FileNotFoundError('%s does not exist' % input_vcf_file) + raise FileNotFoundError("%s does not exist" % input_vcf_file) if output_vcf_file is None: output_vcf_file = "%s.annotated.vcf" % input_vcf_file - vcf_reader = vcf.Reader(filename=input_vcf_file, strict_whitespace=kwargs.get('strict_whitespace', True)) - vcf_template = vcf_reader if template is None else vcf.Reader(filename=template, strict_whitespace=kwargs.get( - 'strict_whitespace', True)) - self.add_vcf_header_info(vcf_template) - self.vcf_writer = vcf.Writer(open(output_vcf_file, 'w'), vcf_template) - input_batch = OrderedDict() - # this will keep the request queue large enough so that parallel requests will not stop executing - batch_limit = self.max_variants_per_batch * self.max_threads * 2 - for record in vcf_reader: - for alt_seq in record.ALT: - requested_variant = "%s:%s:%s:%s" % (record.CHROM, record.POS, record.REF or "", alt_seq or "") - input_batch[requested_variant] = record - self.total_varialts += 1 - if len(input_batch) < batch_limit: - continue - self._process_request(input_batch) - # reset input batch - input_batch = OrderedDict() - # we may have some variants remaining if input batch is less than batch size - if len(input_batch) > 0: - self._process_request(input_batch) - self.vcf_writer.close() - self.logger.info("Annotating %s variants in %s. " - "Filtered out %s. " - "Errors %s" % (self.total_varialts, time.time() - annotations_start, - self.filtered_out_variants, self.variants_with_errors)) + if template is None: + template = input_vcf_file + with vcf_reader( + filename=input_vcf_file, + strict_whitespace=kwargs.get("strict_whitespace", True), + ) as reader: + with vcf_reader( + filename=template, + strict_whitespace=kwargs.get("strict_whitespace", True), + ) as vcf_template: + self.add_vcf_header_info(vcf_template) + with vcf_writer(open(output_vcf_file, "w"), vcf_template) as writer: + input_batch = OrderedDict() + batch_limit = self.max_variants_per_batch * self.max_threads * 2 + for record in reader: + for alt_seq in record.ALT: + requested_variant = "%s:%s:%s:%s" % ( + record.CHROM, + record.POS, + record.REF or "", + alt_seq or "", + ) + input_batch[requested_variant] = record + self.total_variants += 1 + if len(input_batch) < batch_limit: + continue + self._process_request(input_batch, writer) + # reset input batch + input_batch = OrderedDict() + # we may have some variants remaining if input batch is less than batch size + if len(input_batch) > 0: + self._process_request(input_batch, writer) + self.logger.info( + "Annotating %s variants in %s. " + "Filtered out %s. " + "Errors %s" + % ( + self.total_variants, + time.time() - annotations_start, + self.filtered_out_variants, + self.variants_with_errors, + ) + )