--- title: Computer-Use Agent Loop | Lightcone description: Build a multi-turn agent loop using the Responses API where you control every step. --- A computer-use agent (CUA) loop is a pattern where you repeatedly send screenshots to a vision model, it decides the next action, and you execute it. This guide shows how to build a complete CUA loop using the [Responses API](/guides/responses-api/index.md). **Prerequisites**: Understand [Computers](/guides/computers/index.md) (sessions and actions) and the [Responses API](/guides/responses-api/index.md) (request/response format). ## The full loop cua\_loop.py ``` from tzafon import Lightcone client = Lightcone() # 1. Create a browser session with client.computer.create(kind="browser") as computer: # 2. Take an initial screenshot screenshot = computer.screenshot() screenshot_url = computer.get_screenshot_url(screenshot) # 3. Send the first request — use array input to include the screenshot response = client.responses.create( model="tzafon.northstar-cua-fast", input=[ { "role": "user", "content": [ {"type": "input_text", "text": "Search Wikipedia for 'Alan Turing'"}, {"type": "input_image", "image_url": screenshot_url}, ], }, ], tools=[{ "type": "computer_use", "display_width": 1280, "display_height": 720, "environment": "browser", }], ) # 4. Loop until the model stops requesting actions while True: # Find computer_call items in the output computer_call = None for item in response.output or []: if item.type == "computer_call": computer_call = item elif item.type == "message": for block in item.content or []: print(f"Agent says: {block.text}") if not computer_call: print("Agent is done.") break # 5. Execute the action action = computer_call.action print(f"Executing: {action.type}") if action.type in ("click", "double_click", "triple_click", "right_click"): computer.click(action.x, action.y) elif action.type == "type": computer.type(action.text) elif action.type in ("key", "keypress"): computer.hotkey(*action.keys) elif action.type == "scroll": computer.scroll(action.scroll_x or 0, action.scroll_y or 0, action.x or 640, action.y or 400) elif action.type == "navigate": computer.navigate(action.url) elif action.type == "wait": computer.wait(2) elif action.type in ("terminate", "done", "answer"): print(f"Result: {action.result or action.text}") break computer.wait(1) # 6. Take a new screenshot and feed it back screenshot = computer.screenshot() screenshot_url = computer.get_screenshot_url(screenshot) response = client.responses.create( model="tzafon.northstar-cua-fast", previous_response_id=response.id, input=[{ "type": "computer_call_output", "call_id": computer_call.call_id, "output": {"type": "input_image", "image_url": screenshot_url}, }], tools=[{ "type": "computer_use", "display_width": 1280, "display_height": 720, "environment": "browser", }], ) ``` cua\_loop.ts ``` import Lightcone from "@tzafon/lightcone"; const client = new Lightcone(); const computer = await client.computers.create({ kind: "browser" }); const id = computer.id!; try { // Take initial screenshot const initialScreenshot = await client.computers.screenshot(id); const screenshotUrl = initialScreenshot.result?.screenshot_url as string; // First request — use array input to include the screenshot let response = await client.responses.create({ model: "tzafon.northstar-cua-fast", input: [ { role: "user", content: [ { type: "input_text", text: "Search Wikipedia for 'Alan Turing'" }, { type: "input_image", image_url: screenshotUrl }, ], }, ], tools: [{ type: "computer_use", display_width: 1280, display_height: 720, environment: "browser", }], }); // Loop until done while (true) { const computerCall = response.output?.find((item) => item.type === "computer_call"); const message = response.output?.find((item) => item.type === "message"); if (message) { for (const block of message.content ?? []) { console.log(`Agent says: ${block.text}`); } } if (!computerCall) { console.log("Agent is done."); break; } const action = computerCall.action!; console.log(`Executing: ${action.type}`); // Execute the action switch (action.type) { case "click": case "double_click": case "triple_click": case "right_click": await client.computers.click(id, { x: action.x!, y: action.y! }); break; case "type": await client.computers.type(id, { text: action.text! }); break; case "key": case "keypress": await client.computers.hotkey(id, { keys: action.keys! }); break; case "scroll": await client.computers.scroll(id, { dx: action.scroll_x ?? 0, dy: action.scroll_y ?? 0, x: action.x ?? 640, y: action.y ?? 400, }); break; case "navigate": await client.computers.navigate(id, { url: action.url! }); break; case "terminate": case "done": case "answer": console.log(`Result: ${action.result ?? action.text}`); break; } // Screenshot and feed back await new Promise((r) => setTimeout(r, 1000)); const newScreenshot = await client.computers.screenshot(id); const newUrl = newScreenshot.result?.screenshot_url as string; response = await client.responses.create({ model: "tzafon.northstar-cua-fast", previous_response_id: response.id!, input: [{ type: "computer_call_output", call_id: computerCall.call_id!, output: { type: "input_image", image_url: newUrl }, }], tools: [{ type: "computer_use", display_width: 1280, display_height: 720, environment: "browser", }], }); } } finally { await client.computers.delete(id); } ``` ## How it works ``` ┌─────────────┐ instruction + screenshot ┌─────────────┐ │ │ ──────────────────────────────── > │ │ │ Your Code │ │ Model │ │ │ < ──────────────────────────────── │ │ └─────────────┘ computer_call (action) └─────────────┘ │ ^ │ execute action │ v │ ┌─────────────┐ │ │ Computer │ screenshot of new state │ │ Session │ ─────────────────────────────────────────>│ └─────────────┘ (as computer_call_output) ``` Each iteration: 1. The model looks at the screenshot and decides the next action 2. You execute the action on the computer session 3. You take a screenshot of the result 4. You send it back as `computer_call_output` with `previous_response_id` 5. Repeat until the model sends a `message` or `done` action ## When to use this vs. Agent Tasks | | CUA Loop (Responses API) | Agent Tasks | | ----------------- | --------------------------------- | ----------------------- | | **Control** | You control every step | Fully autonomous | | **Customization** | Add custom logic between steps | Limited to instructions | | **Observability** | Full visibility into every action | Stream events | | **Complexity** | More code to write | One API call | | **Best for** | Custom workflows, hybrid agents | Simple end-to-end tasks | Start with [Agent Tasks](/guides/agent-tasks/index.md) for simplicity. Move to the CUA loop when you need custom logic between steps — like validating results, branching on conditions, or integrating with other systems. ## See also - [**Responses API**](/guides/responses-api/index.md) — reference for creating responses, action types, and multi-turn chaining - [**Run an agent**](/guides/run-an-agent/index.md) — simpler alternative using Agent Tasks - [**Automate a browser**](/guides/automate-a-browser/index.md) — direct browser control without an AI model