Reminder
System Info
llamafactory version: 0.8.3.dev0
- Platform: Linux-5.4.143.bsk.7-amd64-x86_64-with-glibc2.31
- Python version: 3.9.2
- PyTorch version: 2.1.0+cu121 (GPU)
- Transformers version: 4.42.3
- Datasets version: 2.20.0
- Accelerate version: 0.32.1
- PEFT version: 0.11.1
- TRL version: 0.9.6
- GPU type: NVIDIA A100-SXM4-80GB
- DeepSpeed version: 0.8.3
Reproduction
Traceback (most recent call last):
File "/home/tiger/.local/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/cli.py", line 111, in main
run_exp()
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/tuner.py", line 56, in run_exp
run_dpo(model_args, data_args, training_args, finetuning_args, callbacks)
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/workflow.py", line 79, in run_dpo
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 1932, in train
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2268, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3307, in training_step
loss = self.compute_loss(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1408, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 229, in get_batch_loss_metrics
losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 160, in compute_preference_loss
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1073, in dpo_loss
if self.f_divergence_type == FDivergenceType.ALPHA_DIVERGENCE.value:
AttributeError: 'CustomDPOTrainer' object has no attribute 'f_divergence_type'
Traceback (most recent call last):
File "/home/tiger/.local/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/cli.py", line 111, in main
run_exp()
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/tuner.py", line 56, in run_exp
run_dpo(model_args, data_args, training_args, finetuning_args, callbacks)
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/workflow.py", line 79, in run_dpo
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 1932, in train
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2268, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3307, in training_step
loss = self.compute_loss(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1408, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 229, in get_batch_loss_metrics
losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 160, in compute_preference_loss
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1073, in dpo_loss
if self.f_divergence_type == FDivergenceType.ALPHA_DIVERGENCE.value:
AttributeError: 'CustomDPOTrainer' object has no attribute 'f_divergence_type'
Expected behavior
训练dpo
Others
No response
Reminder
System Info
llamafactoryversion: 0.8.3.dev0Reproduction
Traceback (most recent call last):
File "/home/tiger/.local/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/cli.py", line 111, in main
run_exp()
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/tuner.py", line 56, in run_exp
run_dpo(model_args, data_args, training_args, finetuning_args, callbacks)
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/workflow.py", line 79, in run_dpo
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 1932, in train
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2268, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3307, in training_step
loss = self.compute_loss(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1408, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 229, in get_batch_loss_metrics
losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 160, in compute_preference_loss
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1073, in dpo_loss
if self.f_divergence_type == FDivergenceType.ALPHA_DIVERGENCE.value:
AttributeError: 'CustomDPOTrainer' object has no attribute 'f_divergence_type'
Traceback (most recent call last):
File "/home/tiger/.local/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/cli.py", line 111, in main
run_exp()
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/tuner.py", line 56, in run_exp
run_dpo(model_args, data_args, training_args, finetuning_args, callbacks)
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/workflow.py", line 79, in run_dpo
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 1932, in train
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2268, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3307, in training_step
loss = self.compute_loss(model, inputs)
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1408, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 229, in get_batch_loss_metrics
losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
File "/mnt/bn/aigc-t2i/lifanshi/code/LLaMA-Factory/src/llamafactory/train/dpo/trainer.py", line 160, in compute_preference_loss
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
File "/home/tiger/.local/lib/python3.9/site-packages/trl/trainer/dpo_trainer.py", line 1073, in dpo_loss
if self.f_divergence_type == FDivergenceType.ALPHA_DIVERGENCE.value:
AttributeError: 'CustomDPOTrainer' object has no attribute 'f_divergence_type'
Expected behavior
训练dpo
Others
No response