From 6135248cef48c3459c78ca2d91f9ce4d17e97e09 Mon Sep 17 00:00:00 2001
From: FanhaiLu1 <fanhai@google.com>
Date: Thu, 25 Apr 2024 22:13:50 +0000
Subject: [PATCH 1/2] Fix float division by zero in benchmark

---
 benchmarks/benchmark_serving.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 11ca8acc..790e5b0b 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -338,6 +338,10 @@ def calculate_metrics(
       )
       total_output += output_len
       total_input += input_requests[i].prompt_len
+      if output_len == 0:
+        print(f"""-------- output_len is zero for {i}th request:,
+             output: {outputs[i]}""")
+        continue
       per_token_latencies.append(outputs[i].latency / output_len)
       ttfts.append(outputs[i].ttft)
       completed += 1

From 1e68c7d07c863a84d7fbbb0c4b24686e38bdbbd4 Mon Sep 17 00:00:00 2001
From: FanhaiLu1 <fanhai@google.com>
Date: Thu, 25 Apr 2024 22:19:52 +0000
Subject: [PATCH 2/2] Fix format

---
 benchmarks/benchmark_serving.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 790e5b0b..7f5efe32 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -339,8 +339,10 @@ def calculate_metrics(
       total_output += output_len
       total_input += input_requests[i].prompt_len
       if output_len == 0:
-        print(f"""-------- output_len is zero for {i}th request:,
-             output: {outputs[i]}""")
+        print(
+            f"""-------- output_len is zero for {i}th request:,
+             output: {outputs[i]}"""
+        )
         continue
       per_token_latencies.append(outputs[i].latency / output_len)
       ttfts.append(outputs[i].ttft)