Skip to content

Commit 31b7b6c

Browse files
authored
Remove deprecated tests/infer_data_path.py (NVIDIA-NeMo#11997)
Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
1 parent 25eb7aa commit 31b7b6c

File tree

5 files changed

+76
-542
lines changed

5 files changed

+76
-542
lines changed

.secrets.baseline

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,15 +1229,6 @@
12291229
"line_number": 2
12301230
}
12311231
],
1232-
"tests/infer_data_path.py": [
1233-
{
1234-
"type": "Base64 High Entropy String",
1235-
"filename": "tests/infer_data_path.py",
1236-
"hashed_secret": "8e0937151cfd9750db688fbe66be37d0c53ed6ab",
1237-
"is_verified": false,
1238-
"line_number": 63
1239-
}
1240-
],
12411232
"tutorials/asr/Multilang_ASR.ipynb": [
12421233
{
12431234
"type": "Hex High Entropy String",

tests/deploy/nemo_deploy.py

Lines changed: 34 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import torch
2323

2424
from nemo.deploy.nlp.megatronllm_deployable import MegatronLLMDeployable
25-
from tests.infer_data_path import get_infer_test_data
2625

2726
run_export_tests = True
2827
try:
@@ -353,88 +352,6 @@ def test_cpp_runtime(
353352
print("")
354353

355354

356-
def run_existing_checkpoints(
357-
model_name,
358-
n_gpus,
359-
tp_size=None,
360-
pp_size=None,
361-
ptuning=False,
362-
lora=False,
363-
streaming=False,
364-
run_accuracy=False,
365-
test_deployment=False,
366-
stop_words_list=None,
367-
test_data_path=None,
368-
backend="tensorrt-llm",
369-
save_engine=False,
370-
):
371-
if n_gpus > torch.cuda.device_count():
372-
print("Skipping the test due to not enough number of GPUs")
373-
return None, None, None, None, None
374-
375-
test_data = get_infer_test_data()
376-
if not (model_name in test_data.keys()):
377-
raise Exception("Model {0} is not supported.".format(model_name))
378-
379-
model_info = test_data[model_name]
380-
381-
if n_gpus < model_info.min_gpus:
382-
print("Min n_gpus for this model is {0}".format(n_gpus))
383-
return None, None, None, None, None
384-
385-
if ptuning and model_info.p_tuning_checkpoint is None:
386-
raise Exception("There is not ptuning checkpoint path defined.")
387-
388-
if lora and model_info.lora_checkpoint is None:
389-
raise Exception("There is not lora checkpoint path defined.")
390-
391-
if model_info.model_type == "gemma":
392-
print("*********************")
393-
use_embedding_sharing = True
394-
else:
395-
use_embedding_sharing = False
396-
397-
if backend == "in-framework":
398-
return run_in_framework_inference(
399-
model_name=model_name,
400-
prompt=model_info.prompt_template,
401-
checkpoint_path=model_info.checkpoint,
402-
max_batch_size=model_info.max_batch_size,
403-
max_input_len=None,
404-
max_output_len=model_info.max_output_len,
405-
)
406-
else:
407-
return run_trt_llm_inference(
408-
model_name=model_name,
409-
model_type=model_info.model_type,
410-
prompt=model_info.prompt_template,
411-
checkpoint_path=model_info.checkpoint,
412-
trt_llm_model_dir=model_info.trt_llm_model_dir,
413-
n_gpu=n_gpus,
414-
max_batch_size=model_info.max_batch_size,
415-
use_embedding_sharing=use_embedding_sharing,
416-
max_input_len=512,
417-
max_output_len=model_info.max_output_len,
418-
max_num_tokens=None,
419-
ptuning=ptuning,
420-
p_tuning_checkpoint=model_info.p_tuning_checkpoint,
421-
lora=lora,
422-
lora_checkpoint=model_info.lora_checkpoint,
423-
tp_size=tp_size,
424-
pp_size=pp_size,
425-
top_k=1,
426-
top_p=0.0,
427-
temperature=1.0,
428-
run_accuracy=run_accuracy,
429-
debug=True,
430-
streaming=streaming,
431-
stop_words_list=stop_words_list,
432-
test_deployment=test_deployment,
433-
test_data_path=test_data_path,
434-
save_engine=save_engine,
435-
)
436-
437-
438355
def get_args():
439356
parser = argparse.ArgumentParser(
440357
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@@ -445,11 +362,6 @@ def get_args():
445362
type=str,
446363
required=True,
447364
)
448-
parser.add_argument(
449-
"--existing_test_models",
450-
default=False,
451-
action='store_true',
452-
)
453365
parser.add_argument(
454366
"--model_type",
455367
type=str,
@@ -602,75 +514,52 @@ def run_inference_tests(args):
602514

603515
result_dic = {}
604516

605-
if args.existing_test_models:
606-
n_gpus = args.min_gpus
607-
if args.max_gpus is None:
608-
args.max_gpus = args.min_gpus
517+
prompt_template = ["The capital of France is", "Largest animal in the sea is"]
518+
n_gpus = args.min_gpus
519+
if args.max_gpus is None:
520+
args.max_gpus = args.min_gpus
609521

610-
while n_gpus <= args.max_gpus:
611-
result_dic[n_gpus] = run_existing_checkpoints(
522+
while n_gpus <= args.max_gpus:
523+
if args.backend.lower() == "tensorrt-llm":
524+
result_dic[n_gpus] = run_trt_llm_inference(
612525
model_name=args.model_name,
613-
n_gpus=n_gpus,
526+
model_type=args.model_type,
527+
prompt=prompt_template,
528+
checkpoint_path=args.checkpoint_dir,
529+
trt_llm_model_dir=args.trt_llm_model_dir,
530+
n_gpu=n_gpus,
531+
max_batch_size=args.max_batch_size,
532+
max_input_len=args.max_input_len,
533+
max_output_len=args.max_output_len,
534+
max_num_tokens=args.max_num_tokens,
614535
ptuning=args.ptuning,
536+
p_tuning_checkpoint=args.p_tuning_checkpoint,
615537
lora=args.lora,
538+
lora_checkpoint=args.lora_checkpoint,
616539
tp_size=args.tp_size,
617540
pp_size=args.pp_size,
541+
top_k=args.top_k,
542+
top_p=args.top_p,
543+
temperature=args.temperature,
544+
run_accuracy=args.run_accuracy,
545+
debug=args.debug,
618546
streaming=args.streaming,
619547
test_deployment=args.test_deployment,
620-
run_accuracy=args.run_accuracy,
621548
test_data_path=args.test_data_path,
622-
backend=args.backend.lower(),
623549
save_engine=args.save_engine,
624550
)
551+
else:
552+
result_dic[n_gpus] = run_in_framework_inference(
553+
model_name=args.model_name,
554+
prompt=prompt_template,
555+
checkpoint_path=args.checkpoint_dir,
556+
n_gpu=n_gpus,
557+
max_batch_size=args.max_batch_size,
558+
max_input_len=args.max_input_len,
559+
max_output_len=args.max_output_len,
560+
)
625561

626-
n_gpus = n_gpus * 2
627-
else:
628-
prompt_template = ["The capital of France is", "Largest animal in the sea is"]
629-
n_gpus = args.min_gpus
630-
if args.max_gpus is None:
631-
args.max_gpus = args.min_gpus
632-
633-
while n_gpus <= args.max_gpus:
634-
if args.backend.lower() == "tensorrt-llm":
635-
result_dic[n_gpus] = run_trt_llm_inference(
636-
model_name=args.model_name,
637-
model_type=args.model_type,
638-
prompt=prompt_template,
639-
checkpoint_path=args.checkpoint_dir,
640-
trt_llm_model_dir=args.trt_llm_model_dir,
641-
n_gpu=n_gpus,
642-
max_batch_size=args.max_batch_size,
643-
max_input_len=args.max_input_len,
644-
max_output_len=args.max_output_len,
645-
max_num_tokens=args.max_num_tokens,
646-
ptuning=args.ptuning,
647-
p_tuning_checkpoint=args.p_tuning_checkpoint,
648-
lora=args.lora,
649-
lora_checkpoint=args.lora_checkpoint,
650-
tp_size=args.tp_size,
651-
pp_size=args.pp_size,
652-
top_k=args.top_k,
653-
top_p=args.top_p,
654-
temperature=args.temperature,
655-
run_accuracy=args.run_accuracy,
656-
debug=args.debug,
657-
streaming=args.streaming,
658-
test_deployment=args.test_deployment,
659-
test_data_path=args.test_data_path,
660-
save_engine=args.save_engine,
661-
)
662-
else:
663-
result_dic[n_gpus] = run_in_framework_inference(
664-
model_name=args.model_name,
665-
prompt=prompt_template,
666-
checkpoint_path=args.checkpoint_dir,
667-
n_gpu=n_gpus,
668-
max_batch_size=args.max_batch_size,
669-
max_input_len=args.max_input_len,
670-
max_output_len=args.max_output_len,
671-
)
672-
673-
n_gpus = n_gpus * 2
562+
n_gpus = n_gpus * 2
674563

675564
test_result = "PASS"
676565
print_separator = False

0 commit comments

Comments
 (0)