xverse-ai
diff --git a/‎README.md‎
Lines changed: 344 additions & 44 deletions b/‎README.md‎
Lines changed: 344 additions & 44 deletions
diff --git a/‎README_EN.md‎
Lines changed: 344 additions & 41 deletions b/‎README_EN.md‎
Lines changed: 344 additions & 41 deletions
diff --git a/‎README_JA.md‎
Lines changed: 350 additions & 48 deletions b/‎README_JA.md‎
Lines changed: 350 additions & 48 deletions
diff --git a/‎chat_demo.py‎
Lines changed: 92 additions & 0 deletions b/‎chat_demo.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎text_generation_demo.py‎
Lines changed: 1 addition & 1 deletion b/‎text_generation_demo.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,92 @@
+import argparse
+import torch
+import gradio as gr
+import json
+from datetime import datetime
+from transformers import AutoModelForCausalLM, AutoTokenizer,GenerationConfig
+
+tokenizer, model = None, None
+
+def init_model(args):
+    global tokenizer, model
+    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path, truncation_side="left", padding_side="left")
+    model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True, torch_dtype=torch.bfloat16,
+                                                 low_cpu_mem_usage=True, device_map='auto')
+    model.generation_config = GenerationConfig.from_pretrained(args.model_path)
+    model = model.eval()
+
+def chat(message, history, request: gr.Request):
+    global tokenizer, model
+    history = history or []
+    history.append({"role": "user", "content": message})
+
+    # init
+    history.append({"role": "assistant", "content": ""})
+    utter_history = []
+    for i in range(0, len(history), 2):
+        utter_history.append([history[i]["content"], history[i+1]["content"]])
+
+    # chat with stream
+    for next_text in model.chat(tokenizer, history[:-1], stream=True):
+        utter_history[-1][1] += next_text
+        history[-1]["content"] += next_text
+        if torch.backends.mps.is_available():
+            torch.mps.empty_cache()
+        yield utter_history, history
+
+    # log
+    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f'{current_time} request_ip:{request.client.host}\nquery: {message}\nhistory: {json.dumps(history, ensure_ascii=False)}\nanswer: {json.dumps(utter_history[-1][1], ensure_ascii=False)}')
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=36000,
+                       help="server port")
+    parser.add_argument("--title", type=str, default="XVERSE-13B-Chat",
+                       help="server title")
+    parser.add_argument("--model_path", type=str, default="./VERSE-13B-Chat",
+                        help="model path")
+    parser.add_argument("--tokenizer_path", type=str, default="./XVERSE-13B-Chat",
+                        help="Path to the tokenizer.")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = get_args()
+    # 初始化模型
+    init_model(args)
+
+    # 构建demo应用
+    with gr.Blocks() as demo:
+        gr.Markdown("# <center>{}</center>".format(args.title))
+        chatbot = gr.Chatbot(label="Chat history", height=650).style(color_map=("green", "pink"))
+        state = gr.State([])
+
+        with gr.Row():
+            text_box = gr.Textbox(label="Message", show_label=False, placeholder="Enter message and press enter").style(container=False)
+
+        with gr.Row():
+            submit_btn = gr.Button(value="Send", variant="secondary")
+            reset_btn = gr.Button(value="Reset")
+
+        text_box.submit(fn=chat,
+                        inputs=[text_box, state],
+                        outputs=[chatbot, state],
+                        api_name="chat")
+        submit_btn.click(fn=chat,
+                         inputs=[text_box, state],
+                         outputs=[chatbot, state])
+
+        # 用于清空text_box
+        def clear_textbox():
+            return gr.update(value="")
+        text_box.submit(fn=clear_textbox, inputs=None, outputs=[text_box])
+        submit_btn.click(fn=clear_textbox, inputs=None, outputs=[text_box])
+
+        # 用于清空页面和重置state
+        def reset():
+            return None, []
+        reset_btn.click(fn=reset, inputs=None, outputs=[chatbot, state])
+
+    demo.queue(concurrency_count=4)
+    demo.launch(server_name="0.0.0.0", server_port=args.port)
@@ -1,3 +1,5 @@
 transformers>=4.29.1
 torch>=2.0
 gradio>=3.39.0
+accelerate>=0.21.0
+cpm_kernels>=1.0.11
@@ -11,7 +11,7 @@
 def init_model(args):
     global tokenizer, model
     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path, truncation_side="left", padding_side="left")
-    model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True, torch_dtype=torch.float16, device_map='auto')
+    model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto')
     model = model.eval()