Added a base recipe for Llama3.1-405b experiments. Added a recipe for the dataset for Llama3.1-405b.

mosalov · mosalov · commit 3efdfe017e45 · 2025-02-19T12:10:51.000+01:00
diff --git a/base_llama3_1_loadgen_experiment/code_axs.py b/base_llama3_1_loadgen_experiment/code_axs.py
@@ -0,0 +1,55 @@
+import json
+
+from transformers import AutoTokenizer
+
+def get_accuracy_dict(accuracy_dict_full):
+    accuracy_dict = {}
+    for k in accuracy_dict_full.keys():
+        if k in ["rougeL", "exact_match", "tokens_per_sample"]:
+            accuracy_dict[k] = accuracy_dict_full[k]
+    return accuracy_dict
+
+def parse_tokens(
+    tokenised_accuracy_log_path: str, output_log_path: str
+):
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append(tokens)
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
+
+def detokenise(
+    checkpoint_path: str, tokenised_accuracy_log_path: str, output_log_path: str
+):
+    tokeniser = AutoTokenizer.from_pretrained(checkpoint_path)
+
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append({
+            "seq_id" : item["seq_id"],
+            "qsl_idx" : item["qsl_idx"],
+            "data": tokeniser.decode(tokens),
+            "token_count" : item["token_count"]
+        })
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
diff --git a/base_llama3_1_loadgen_experiment/data_axs.json b/base_llama3_1_loadgen_experiment/data_axs.json
@@ -0,0 +1,88 @@
+{
+    "_parent_entries": [ [ "^", "byname", "base_loadgen_experiment" ] ],
+
+    "transformers_query": [ "python_package", "package_name=transformers", ["desired_python_version", ["^", "kernel_python_major_dot_minor"]] ],
+
+    "_BEFORE_CODE_LOADING": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byquery", [[ "^^", "get", "transformers_query" ]] ],
+        [ "use" ]
+    ]] ],
+
+    "desired_python_version": "3.8",
+
+    "mlperf_inference_git_entry": [ "^", "byquery", "git_repo,repo_name=mlperf_inference_git" ],
+
+    "abs_script_path": [ "^^", "execute", [[
+        [ "get", "mlperf_inference_git_entry" ],
+        [ "get_path_of", "llama3_1_accuracy_script" ]
+    ]] ],
+
+    "accuracy_log_path": ["^^", "get_path", "mlperf_log_accuracy.json"],
+
+    "dataset_name": "llrg",
+    "dataset_query": [ "downloaded", [ "^^", "substitute", "dataset_name=#{dataset_name}#" ]],
+    "dataset_entry": [ "^", "byquery", [[ "^^", "get", "dataset_query" ]], {}, ["dataset_query"] ],
+
+    "dataset_path": [ "^^", "execute", [[
+        [ "get", "dataset_entry" ],
+        [ "get_path" ],
+        [ "__add__", "/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl" ]
+    ]] ],
+    
+    "model_family": "llama3_1",
+    "model_variant": "405b",
+    "variant": [ "^^", "get", "model_variant" ],
+    "checkpoint_path_query": [ "^^", "substitute", "downloaded,hf_tokeniser,model_family=#{model_family}#,variant=#{variant}#" ],
+    "checkpoint_path": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byquery", [[ "^^", "get", "checkpoint_path_query" ]] ],
+        [ "get_path" ]
+    ]] ],
+
+    "accuracy_log_dtype": "int32",
+
+    "extract_accuracy_report": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byname", "python_script" ],
+        [ "run", [], {
+                "python_deps": [
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=protobuf" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=torch" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=transformers" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=nltk" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=rouge_score" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=sentencepiece" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=pillow" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=evaluate" ]
+                ],
+                "abs_script_path": ["^^", "get", "abs_script_path"],
+                "script_extra_params": [ "^^", "substitute", "--mlperf-accuracy-file #{accuracy_log_path}# --dataset-file #{dataset_path}# --dtype #{accuracy_log_dtype}# --checkpoint-path #{checkpoint_path}#" ],
+                "desired_python_version": ["^", "kernel_python_major_dot_minor"],
+                "capture_output": true
+            } ],
+        0,
+        [ "func", [ "ufun.rematch", "(\\{.*\\})" ] ],
+        0,
+        [ "denumpify_dict" ],
+        0,
+        [ "func", "str" ]
+    ]], {} ],
+
+    "accuracy_dict_full": [ "^^", "execute", [[
+        ["get", "accuracy_report" ],
+        0,
+        [ "func", "eval" ]
+     ]], {} ],
+    "accuracy_dict": [ "^^", "get_accuracy_dict" ],
+    "rougeL": [ "^^" , "dig","accuracy_dict.rougeL" ],
+    "exact_match": [ "^^" , "dig","accuracy_dict.exact_match" ],
+    "tokens_per_sample": [ "^^" , "dig","accuracy_dict.tokens_per_sample" ],
+
+    "accuracy_range_dict": { "rougeL": [ 21.449934, null ], "exact_match": [ 89.232165, null ], "tokens_per_sample": [ 616.212, null ] },
+
+    "tokenised_accuracy_log_path": [ "^^", "get_path", "mlperf_log_accuracy.json" ],
+    "output_log_path": [ "^^", "get_path", "detokenised_mlperf_log.json" ],
+
+    "detokenised_log": [ "^^", "detokenise" ]
+}
diff --git a/data_axs.json b/data_axs.json
@@ -64,7 +64,9 @@
         "model_training_llama2_recipe": "model_training_llama2_recipe",
         "dataset_scrolls_gov_report_8k_recipe": "dataset_scrolls_gov_report_8k_recipe",
         "rclone_mlc_llama2_config": "rclone_mlc_llama2_config",
-        "explore_recipe": "explore_recipe"
+        "explore_recipe": "explore_recipe",
+        "base_llama3_1_loadgen_experiment": "base_llama3_1_loadgen_experiment",
+        "dataset_llrg_mlperf_recipe": "dataset_llrg_mlperf_recipe"
     },
     "repo_name": "axs2mlperf",
     "submodules": false
diff --git a/dataset_llrg_mlperf_recipe/data_axs.json b/dataset_llrg_mlperf_recipe/data_axs.json
@@ -0,0 +1,13 @@
+{
+    "_producer_rules": [
+        [ [ "downloaded", "dataset_name=llrg", "model_name=llama3_1", "variant=405b" ], [["get_kernel"],["byname","downloader"],["download"]], {
+            "downloading_tool_query": "shell_tool,can_download_url_from_rclone",
+            "url": "mlc_inference:mlcommons-inference-wg-public/llama3.1_405b",
+            "downloading_tool_params": {
+                "rclone_remote_name": "mlc_inference"
+	    },
+	    "newborn_entry_name": "downloaded_mlc_llrg",
+	    "file_path": "mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl"
+        }, [] ]
+    ]
+}