import argparse import base64 from openai import OpenAI from vllm import LLM parse = argparse.ArgumentParser() parse.add_argument("--image_path", type=str, default="./doc/planning.png") parse.add_argument("--port", type=int, default=8000) args = parse.parse_args() if __name__ == "__main__": openai_api_key = "robobrain-123123" openai_api_base = f"http://127.0.0.1:{args.port}/v1" client = OpenAI( api_key=openai_api_key, base_url=openai_api_base, ) prompt = "Given the obiects in the image, if you are required to complete the task \"Put the apple in the basket\", what is your detailed plan? Write your plan and explain it in detail, using the following format: Step_1: xxx\nStep_2: xxx\n ...\nStep_n: xxx\n" with open(args.image_path, "rb") as f: encoded_image = base64.b64encode(f.read()) encoded_image = encoded_image.decode("utf-8") base64_img = f"data:image;base64,{encoded_image}" models = client.models.list() model = models.data[0].id response = client.chat.completions.create( model=model, messages=[ { "role": "user", "content": [ {"type": "image_url", "image_url": {"url": base64_img}}, {"type": "text", "text": prompt}, ], }, ], temperature=0.1 ) content = response.choices[0].message.content print(content) ''' Prediction: (as an example) Step_1: Move to the apple. Move towards the apple on the table. Step_2: Pick up the apple. Grab the apple and lift it off the table. Step_3: Move towards the basket. Move the apple towards the basket without dropping it. Step_4: Put the apple in the basket. Place the apple inside the basket, ensuring it is in a stable position. '''