Skip to content

if isinstance(videos, (list, tuple)) and isinstance(videos[0], (list, tuple)) and is_valid_image(videos[0][0]): IndexError: list index out of range #7687

@131413ljk

Description

@131413ljk

Reminder

  • I have read the above rules and searched the existing issues.

System Info

{
"processor_class": "Qwen2_5_VLProcessor"
}

[INFO|2025-04-11 21:43:18] llamafactory.data.template:157 >> Add <|im_end|> to stop words.
[INFO|2025-04-11 21:43:18] llamafactory.data.loader:157 >> Loading dataset meld_train.json...
Running tokenizer on dataset (num_proc=16): 0%| | 0/6991 [02:47<?, ? examples/s]
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 678, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3476, in _map_single
batch = apply_function_on_filtered_inputs(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3338, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/processor/supervised.py", line 99, in preprocess_dataset
input_ids, labels = self._encode_data_example(
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/processor/supervised.py", line 43, in _encode_data_example
messages = self.template.mm_plugin.process_messages(prompt + response, images, videos, audios, self.processor)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/mm_plugin.py", line 1112, in process_messages
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/mm_plugin.py", line 1094, in _get_mm_inputs
mm_inputs.update(image_processor(images=None, videos=videos, return_tensors="pt"))
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/image_processing_utils.py", line 42, in call
return self.preprocess(images, **kwargs)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 354, in preprocess
videos = make_batched_videos(videos)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/transformers/image_utils.py", line 322, in make_batched_videos
if isinstance(videos, (list, tuple)) and isinstance(videos[0], (list, tuple)) and is_valid_image(videos[0][0]):
IndexError: list index out of range
"""

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/home/ljk/anaconda3/envs/py_39/bin/llamafactory-cli", line 8, in
sys.exit(main())
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/cli.py", line 112, in main
run_exp()
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/tuner.py", line 93, in run_exp
_training_function(config={"args": args, "callbacks": callbacks})
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/tuner.py", line 67, in _training_function
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/train/sft/workflow.py", line 51, in run_sft
dataset_module = get_dataset(template, model_args, data_args, training_args, stage="sft", **tokenizer_module)
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/loader.py", line 325, in get_dataset
dataset = _get_preprocessed_dataset(
File "/home/ljk/LLaMA-Factory-main/src/llamafactory/data/loader.py", line 258, in _get_preprocessed_dataset
dataset = dataset.map(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 560, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 3165, in map
for rank, done, content in iflatmap_unordered(
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 718, in iflatmap_unordered
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 718, in
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/ljk/anaconda3/envs/py_39/lib/python3.9/site-packages/multiprocess/pool.py", line 771, in get
raise self._value
IndexError: list index out of range
Browse command:

Reproduction

w'h'ywhy

Others

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    solvedThis problem has been already solved

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions