Reminder
System Info
{
"processor_class": "Qwen2_5_VLProcessor"
}
[INFO|2025-04-11 21:43:18] llamafactory.data.template:157 >> Add <|im_end|> to stop words.
[INFO|2025-04-11 21:43:18] llamafactory.data.loader:157 >> Loading dataset meld_train.json...
Running tokenizer on dataset (num_proc=16): 0%| | 0/6991 [02:47<?, ? examples/s]
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 678, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3476, in _map_single
batch = apply_function_on_filtered_inputs(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3338, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/processor/supervised.py", line 99, in preprocess_dataset
input_ids, labels = self._encode_data_example(
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/processor/supervised.py", line 43, in _encode_data_example
messages = self.template.mm_plugin.process_messages(prompt + response, images, videos, audios, self.processor)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/mm_plugin.py", line 1112, in process_messages
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/mm_plugin.py", line 1094, in _get_mm_inputs
mm_inputs.update(image_processor(images=None, videos=videos, return_tensors="pt"))
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/image_processing_utils.py", line 42, in call
return self.preprocess(images, **kwargs)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 354, in preprocess
videos = make_batched_videos(videos)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/image_utils.py", line 322, in make_batched_videos
if isinstance(videos, (list, tuple)) and isinstance(videos[0], (list, tuple)) and is_valid_image(videos[0][0]):
IndexError: list index out of range
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/ljk/anaconda3/envs/py_39/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/cli.py", line 112, in main
run_exp()
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/tuner.py", line 93, in run_exp
_training_function(config={"args": args, "callbacks": callbacks})
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/tuner.py", line 67, in _training_function
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/sft/workflow.py", line 51, in run_sft
dataset_module = get_dataset(template, model_args, data_args, training_args, stage="sft", **tokenizer_module)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/loader.py", line 325, in get_dataset
dataset = _get_preprocessed_dataset(
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/loader.py", line 258, in _get_preprocessed_dataset
dataset = dataset.map(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 560, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3165, in map
for rank, done, content in iflatmap_unordered(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 718, in iflatmap_unordered
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 718, in
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/multiprocess/pool.py", line 771, in get
raise self._value
IndexError: list index out of range
Browse command:
Reproduction
w'h'ywhy
Others
No response
Reminder
System Info
{
"processor_class": "Qwen2_5_VLProcessor"
}
[INFO|2025-04-11 21:43:18] llamafactory.data.template:157 >> Add <|im_end|> to stop words.
[INFO|2025-04-11 21:43:18] llamafactory.data.loader:157 >> Loading dataset meld_train.json...
Running tokenizer on dataset (num_proc=16): 0%| | 0/6991 [02:47<?, ? examples/s]
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 678, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3476, in _map_single
batch = apply_function_on_filtered_inputs(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3338, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/processor/supervised.py", line 99, in preprocess_dataset
input_ids, labels = self._encode_data_example(
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/processor/supervised.py", line 43, in _encode_data_example
messages = self.template.mm_plugin.process_messages(prompt + response, images, videos, audios, self.processor)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/mm_plugin.py", line 1112, in process_messages
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/mm_plugin.py", line 1094, in _get_mm_inputs
mm_inputs.update(image_processor(images=None, videos=videos, return_tensors="pt"))
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/image_processing_utils.py", line 42, in call
return self.preprocess(images, **kwargs)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 354, in preprocess
videos = make_batched_videos(videos)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/image_utils.py", line 322, in make_batched_videos
if isinstance(videos, (list, tuple)) and isinstance(videos[0], (list, tuple)) and is_valid_image(videos[0][0]):
IndexError: list index out of range
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/ljk/anaconda3/envs/py_39/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/cli.py", line 112, in main
run_exp()
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/tuner.py", line 93, in run_exp
_training_function(config={"args": args, "callbacks": callbacks})
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/tuner.py", line 67, in _training_function
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/sft/workflow.py", line 51, in run_sft
dataset_module = get_dataset(template, model_args, data_args, training_args, stage="sft", **tokenizer_module)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/loader.py", line 325, in get_dataset
dataset = _get_preprocessed_dataset(
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/loader.py", line 258, in _get_preprocessed_dataset
dataset = dataset.map(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 560, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3165, in map
for rank, done, content in iflatmap_unordered(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 718, in iflatmap_unordered
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 718, in
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/multiprocess/pool.py", line 771, in get
raise self._value
IndexError: list index out of range
Browse command:
Reproduction
w'h'ywhy
Others
No response