axolotl/src/axolotl/integrations/kd/trainer.py, line77 if num_items_in_batch is None: num_items_in_batch = -1 /usr/local/lib/python3.10/dist-packages/trl/scripts/vllm_serve.py, line 67 add after "os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"" ``` os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" import multiprocessing as mp try: mp.set_start_method('spawn', force=True) print("spawned") except RuntimeError: pass ``` "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/qwen2.py", line 397, in load_weights ``` load_weights -> https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/qwen2.py # vllm==0.9.0 ``` "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/linear.py", line 220, in apply "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/vocab_parallel_embedding.py", line 57, in apply ``` # return torch.matmul(x, layer.weight) if x.shape[-1] == layer.weight.shape[-1]: return torch.matmul(x, layer.weight.permute(1, 0)) else: return torch.matmul(x, layer.weight) ```