Skip to content

docker container error #40

@runzeer

Description

@runzeer

set -x

MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path

SYSTEM_PROMPT=""""""

python3 -m verl.trainer.main
    config=examples/config.yaml
    data.train_files=datasets/GUI-R1/train.parquet
    data.val_files=datasets/GUI-R1/test.parquet
    data.system_prompt="${SYSTEM_PROMPT}"
    worker.actor.model.model_path=${MODEL_PATH}
    worker.rollout.tensor_parallel_size=1
    worker.rollout.enable_chunked_prefill=false
    worker.reward.compute_score=r1gui
    trainer.experiment_name=qwen2_5_vl_3b_guir1_grpo
    trainer.n_gpus_per_node=8
    data.max_pixels=1258291
    data.max_prompt_length=2048
    data.max_response_length=1024
    data.val_batch_size=256

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/workspace/GUI-R1/verl/trainer/main.py", line 109, in
    main()
  File "/workspace/GUI-R1/verl/trainer/main.py", line 105, in main
    ray.get(runner.run.remote(ppo_config))
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py", line 2755, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py", line 906, in get_objects
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(NotImplementedError): ray::Runner.run() (pid=8822, ip=172.17.0.2, actor_id=26a989c7b7e863bf82d8bd9701000000, repr=<main.Runner object at 0x7fba5b1021a0>)
  File "/workspace/GUI-R1/verl/trainer/main.py", line 84, in run
    trainer.init_workers()
  File "/workspace/GUI-R1/verl/trainer/ray_trainer.py", line 459, in init_workers
    self.ref_policy_wg.init_model()
  File "/workspace/GUI-R1/verl/single_controller/ray/base.py", line 47, in func
    output = ray.get(output)
ray.exceptions.RayTaskError(NotImplementedError): ray::WorkerDict.ref_init_model() (pid=9477, ip=172.17.0.2, actor_id=862252fcfa0a702017bb208d01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc6b4d26b60>)
  File "/workspace/GUI-R1/verl/single_controller/ray/base.py", line 432, in func
    return getattr(self.worker_dict[key], name)(*args, **kwargs)
  File "/workspace/GUI-R1/verl/single_controller/base/decorator.py", line 207, in inner
    return func(*args, **kwargs)
  File "/workspace/GUI-R1/verl/workers/fsdp_workers.py", line 349, in init_model
    self._build_model_optimizer(
  File "/workspace/GUI-R1/verl/workers/fsdp_workers.py", line 195, in _build_model_optimizer
    model = auto_class.from_pretrained(
  File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 564, in from_pretrained
    return model_class.from_pretrained(
  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 262, in _wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 4397, in from_pretrained
    dispatch_model(model, **device_map_kwargs)
  File "/usr/local/lib/python3.10/dist-packages/accelerate/big_modeling.py", line 496, in dispatch_model
    model.to(device)
  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 3162, in to
    return super().to(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
 

File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1333, in convert
    raise NotImplementedError(
NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions