Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ You may wish to catch this exception and proceed with your own code logic
# proceed with your code flow e.g.
print(e) # 404 (invalid reference genome)

### Example Usage
## Example Command Line Usage

### Single Variants or Small Batches

You may download and run the run.py python file after installation of the package
to test the api client directly e.g.
Expand All @@ -75,6 +76,20 @@ Run

for a list of available options

### Large Batches

When retrieving information for large batches, you may use `batchRequestClient.py`.
It accepts a text file with one variant per line and outputs a json file. e.g.

./batchRequestClient.py -i vars.txt -o test.json -k 'your token' -n 10

Run

./batchRequestClient.py -h

for a list of available options

## Reference
To view available request parameters (used in the params method parameter) refer to an example at [api.varsome.com](https://api.varsome.com) or
the [api documentation](http://docs.varsome.apiary.io).

Expand Down
138 changes: 83 additions & 55 deletions batchRequestClient.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

# A simple client application that does the following:
# - Loads a text file containing one variant per row
# - Performs a batch lookups to the Saphetor Variant API using N variants at a time.
# - Performs a batch lookups to the Saphetor Variant API using n variants at a time.
# - Saves the results in a new file.
#
# It uses the following module:
# variantapi.client (https://github.com/saphetor/variant-api-client-python)
#
# Note: To sort output json file execute:
# variantapi.client
# https://github.com/saphetor/variant-api-client-python
#
# Note:
# To sort output json file execute:
# jq -S '.' output.txt > output_sorted.txt

import argparse
Expand All @@ -22,64 +24,90 @@

__author__ = 'stephanos-androutsellis'

# Declare reference genome as a global variable
_ref_genome = 1019


def main(argv):
# Read and parse arguments
infile = ''
outfile = ''
batch_size = 5000

parser = argparse.ArgumentParser(description='Simple batch lookup Client application')
parser.add_argument('-i', help='Input file', type=str, metavar='Input File', required=True)
parser.add_argument('-o', help='Output file', type=str, metavar='Output File', required=True)
parser.add_argument('-n', help="Number of variants to batch", type=int, metavar='Batch size', required=True,default=5000)
parser.add_argument('-k', help='Your key to the API', type=str, metavar='API Key', required=False)
parser.add_argument('-g', help='Reference genome either 1019 (default) or 1038', type=int,
metavar='Reference Genome', required=False, default=1019)

args = parser.parse_args()
infile = args.i
outfile = args.o
batch_size = args.n if args.n is not None else batch_size
api_key = args.k
ref_genome = args.g if args.g is not None else _ref_genome
infile = ''
outfile = ''

# Open and load input file into list
print("Reading input file ", infile)
with open(infile) as fi:
variants = fi.readlines()
variants = [v.strip('\n') for v in variants]
parser = argparse.ArgumentParser(
description='Simple batch lookup Client application. '
)
parser.add_argument('-i',
help='Input file',
type=str,
metavar='Input File',
required=True
)
parser.add_argument('-o',
help='Output file',
type=str,
metavar='Output File',
required=True
)
parser.add_argument('-n',
help="Number of variants per GET request",
type=int,
metavar='Batch size',
required=False,
default=10000
)
parser.add_argument('-k',
help='Your key to the API',
type=str,
metavar='API Key',
required=False
)
parser.add_argument('-g',
help='Reference genome either hg19 (default) or hg38',
type=str,
metavar='Reference Genome',
required=False,
default='hg19'
)
parser.add_argument('-p',
help='Request parameters '
'e.g. add-all-data=1 expand-pubmed-articles=0',
type=str,
metavar='Request Params',
required=False,
nargs='+'
)

# Prepare output for writing.
print("Opening output file ", outfile)
fo = open(outfile,'w')
args = parser.parse_args()
infile = args.i
outfile = args.o
batch_size = args.n
api_key = args.k
ref_genome = args.g
request_parameters = None
if args.p:
request_parameters = {param[0]: param[1] for param in [
param.split("=") for param in args.p
]
}

# Initialize client connection to API
api = VariantAPIClient(api_key)
if (api is None):
print("Failed to connect to API")
sys.exit()
# Open and load input file into list
print("Reading input file ", infile)
with open(infile) as fi:
variants = fi.readlines()
variants = [v.strip('\n') for v in variants]

batch_counter = 0
finished = False
while not finished:
start_index = batch_counter*batch_size
end_index = (batch_counter+1)*batch_size
if (end_index > len(variants)):
end_index = len(variants)
finished = True
print(start_index, ":", end_index-1)
batch_variants = variants[start_index:end_index]
print("Lookup for: ", batch_variants, "with ref_genome= ", ref_genome)
batch_data = api.batch_lookup(batch_variants, ref_genome=ref_genome)
# Initialize client connection to API
api = VariantAPIClient(api_key, max_variants_per_batch=batch_size)
if (api is None):
print("Failed to connect to API")
sys.exit()

fo.write(json.dumps(batch_data, indent=2))
batch_counter += 1
print("posting GET requests... ", end='')
results = api.batch_lookup(
variants,
params=request_parameters,
ref_genome=ref_genome
)
print("done")

print ("Finished ", batch_counter, " batch lookups")
print("writing output file ", outfile)
with open (outfile, 'w') as fo:
fo.write(json.dumps(results, indent=4))

if __name__ == '__main__':
main(argv)
46 changes: 36 additions & 10 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,55 @@

def main(argv):
parser = argparse.ArgumentParser(description='Sample Variant API calls')
parser.add_argument('-k', help='Your key to the API', type=str, metavar='API Key', required=False)
parser.add_argument('-g', help='Reference genome either hg19 or hg38', type=str, metavar='Reference Genome',
required=False, default='hg19')
parser.add_argument('-k',
help='Your key to the API',
type=str,
metavar='API Key',
required=False
)
parser.add_argument('-g',
help='Reference genome either hg19 or hg38',
type=str,
metavar='Reference Genome',
required=False,
default='hg19'
)
parser.add_argument('-q',
help='Query to lookup in the API e.g. chr19:20082943:1:G or in case of batch request '
'e.g. chr19:20082943:1:G rs113488022',
type=str, metavar='Query', required=True, nargs='+')
help='Query to lookup in the API e.g. chr19:20082943:1:G '
'or in case of batch request '
'e.g. chr19:20082943:1:G rs113488022',
type=str,
metavar='Query',
required=True,
nargs='+'
)
parser.add_argument('-p',
help='Request parameters e.g. add-all-data=1 expand-pubmed-articles=0',
type=str, metavar='Request Params', required=False, nargs='+')
help='Request parameters '
'e.g. add-all-data=1 expand-pubmed-articles=0',
type=str,
metavar='Request Params',
required=False,
nargs='+'
)
args = parser.parse_args()
api_key = args.k
query = args.q
ref_genome = args.g
request_parameters = None
if args.p:
request_parameters = {param[0]: param[1] for param in [param.split("=") for param in args.p]}
request_parameters = {param[0]: param[1] for param in [
param.split("=") for param in args.p
]
}

api = VariantAPIClient(api_key)

if len(query) == 1:
result = api.lookup(query[0], params=request_parameters, ref_genome=ref_genome)
else:
if api_key is None:
sys.exit("You need to pass an api key to perform batch requests")
sys.exit("You need to pass an api key to perform batch requests"
"consider using batchRequestClient.py for large batch lookups")
result = api.batch_lookup(query, params=request_parameters, ref_genome=ref_genome)
sys.stdout.write(json.dumps(result, indent=4, sort_keys=True) if result else "No result")
sys.stdout.write("\n")
Expand Down
15 changes: 10 additions & 5 deletions variantapi/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class VariantAPIClient(VariantAPIClientBase):
lookup_path = "/lookup/%s/%s"
batch_lookup_path = "/lookup/batch/%s"

def __init__(self, api_key=None, max_variants_per_batch=200):
def __init__(self, api_key=None, max_variants_per_batch=10000):
super(VariantAPIClient, self).__init__(api_key)
self.max_variants_per_batch = max_variants_per_batch

Expand All @@ -106,7 +106,10 @@ def lookup(self, query, params=None, ref_genome='hg19'):
return self.get(self.lookup_path % (query, ref_genome), params=params)

def batch_lookup(self, variants, params=None, ref_genome='hg19'):
"""
"""return list of query results for all variants.

split variants into chunks of size max_variants_per_batch. post GET for each chunk,
but return combined results.

:param variants: list of variant representations
:param params: dictionary of key value pairs for http GET parameters. Refer to the api documentation
Expand All @@ -115,9 +118,11 @@ def batch_lookup(self, variants, params=None, ref_genome='hg19'):
:return: list of dictionaries with annotations per variant refer to https://api.varsome.com/lookup/schema
for dictionary properties
"""
n = self.max_variants_per_batch
chunks = [variants[i:i+n] for i in range(0, len(variants), n)]

results = []
for queries in [variants[x:x + self.max_variants_per_batch] for x in range(0, len(variants),
self.max_variants_per_batch)]:
data = self.post(self.batch_lookup_path % ref_genome, params=params, json_data={'variants': queries})
for chunk in chunks:
data = self.post(self.batch_lookup_path % ref_genome, params=params, json_data={'variants': chunk})
results.extend(data)
return results