Computer-Use Loop
Build a multi-turn loop using the Responses API where you control every step.
A computer-use loop is a pattern where you repeatedly send screenshots to Northstar, the model decides the next action, and you execute it on a cloud computer. This guide shows how to build a complete loop using the Responses API.
Prerequisites: Understand Computers (lifecycle and actions) and the Responses API (request/response format).
The full loop
Section titled “The full loop”from tzafon import Lightcone
client = Lightcone()
# 1. Spin up a cloud computerwith client.computer.create(kind="desktop") as computer: # 2. Take an initial screenshot screenshot = computer.screenshot() screenshot_url = computer.get_screenshot_url(screenshot)
# 3. Send the first request — use array input to include the screenshot response = client.responses.create( model="tzafon.northstar-cua-fast", input=[ { "role": "user", "content": [ {"type": "input_text", "text": "Open the terminal and run 'uname -a'"}, {"type": "input_image", "image_url": screenshot_url, "detail": "auto"}, ], }, ], tools=[{ "type": "computer_use", "display_width": 1280, "display_height": 720, "environment": "desktop", }], )
# 4. Loop until Northstar stops requesting actions while True: # Find computer_call items in the output computer_call = None for item in response.output or []: if item.type == "computer_call": computer_call = item elif item.type == "message": for block in item.content or []: print(f"Northstar says: {block.text}")
if not computer_call: print("Done.") break
# 5. Execute the action action = computer_call.action print(f"Executing: {action.type}")
if action.type == "click" and getattr(action, "button", "left") == "right": computer.right_click(action.x, action.y) elif action.type == "click": computer.click(action.x, action.y) elif action.type == "double_click": computer.double_click(action.x, action.y) elif action.type == "type": computer.type(action.text) elif action.type in ("key", "keypress"): computer.hotkey(*action.keys) elif action.type == "scroll": computer.scroll(0, action.scroll_y or 0, action.x or 640, action.y or 400) elif action.type == "hscroll": computer.scroll(action.scroll_x or 0, 0, action.x or 640, action.y or 400) elif action.type == "drag": computer.drag(action.x, action.y, action.end_x, action.end_y) elif action.type == "navigate": computer.navigate(action.url) elif action.type == "wait": computer.wait(2) elif action.type == "terminate": print(f"{action.status}: {action.result}") break elif action.type == "answer": print(f"Answer: {action.result}") break elif action.type == "done": print(f"Done: {action.text}") break
computer.wait(1)
# 6. Take a new screenshot and feed it back. # previous_response_id tells the server to include the full prior # conversation (including the model's own output), so we only send # what's new: the screenshot after the action. screenshot = computer.screenshot() screenshot_url = computer.get_screenshot_url(screenshot)
response = client.responses.create( model="tzafon.northstar-cua-fast", previous_response_id=response.id, input=[{ "type": "computer_call_output", "call_id": computer_call.call_id, "output": {"type": "input_image", "image_url": screenshot_url, "detail": "auto"}, }], tools=[{ "type": "computer_use", "display_width": 1280, "display_height": 720, "environment": "desktop", }], )import Lightcone from "@tzafon/lightcone";
const client = new Lightcone();const computer = await client.computers.create({ kind: "desktop" });const id = computer.id!;
try { // Take initial screenshot const initialScreenshot = await client.computers.screenshot(id); const screenshotUrl = initialScreenshot.result?.screenshot_url as string;
// First request — use array input to include the screenshot let response = await client.responses.create({ model: "tzafon.northstar-cua-fast", input: [ { role: "user", content: [ { type: "input_text", text: "Open the terminal and run 'uname -a'" }, { type: "input_image", image_url: screenshotUrl, detail: "auto" }, ], }, ], tools: [{ type: "computer_use", display_width: 1280, display_height: 720, environment: "desktop", }], });
// Loop until done while (true) { const computerCall = response.output?.find((item) => item.type === "computer_call"); const message = response.output?.find((item) => item.type === "message");
if (message) { for (const block of message.content ?? []) { console.log(`Northstar says: ${block.text}`); } }
if (!computerCall) { console.log("Done."); break; }
const action = computerCall.action!; console.log(`Executing: ${action.type}`);
// Execute the action — right-clicks come as click with button: "right" if (action.type === "click" && action.button === "right") { await client.computers.rightClick(id, { x: action.x!, y: action.y! }); } else switch (action.type) { case "click": await client.computers.click(id, { x: action.x!, y: action.y! }); break; case "double_click": await client.computers.doubleClick(id, { x: action.x!, y: action.y! }); break; case "type": await client.computers.type(id, { text: action.text! }); break; case "key": case "keypress": await client.computers.hotkey(id, { keys: action.keys! }); break; case "scroll": await client.computers.scroll(id, { dx: 0, dy: action.scroll_y ?? 0, x: action.x ?? 640, y: action.y ?? 400, }); break; case "hscroll": await client.computers.scroll(id, { dx: action.scroll_x ?? 0, dy: 0, x: action.x ?? 640, y: action.y ?? 400, }); break; case "drag": await client.computers.drag(id, { x1: action.x!, y1: action.y!, x2: action.end_x!, y2: action.end_y!, }); break; case "navigate": await client.computers.navigate(id, { url: action.url! }); break; case "terminate": console.log(`${action.status}: ${action.result}`); break; case "answer": console.log(`Answer: ${action.result}`); break; case "done": console.log(`Done: ${action.text}`); break; }
// Screenshot and feed back. // previous_response_id tells the server to include the full prior // conversation (including the model's own output), so we only send // what's new: the screenshot after the action. await new Promise((r) => setTimeout(r, 1000)); const newScreenshot = await client.computers.screenshot(id); const newUrl = newScreenshot.result?.screenshot_url as string;
response = await client.responses.create({ model: "tzafon.northstar-cua-fast", previous_response_id: response.id!, input: [{ type: "computer_call_output", call_id: computerCall.call_id!, output: { type: "input_image", image_url: newUrl, detail: "auto" }, }], tools: [{ type: "computer_use", display_width: 1280, display_height: 720, environment: "desktop", }], }); }} finally { await client.computers.delete(id);}How it works
Section titled “How it works”┌─────────────┐ instruction + screenshot ┌─────────────┐│ │ ──────────────────────────────── > │ ││ Your Code │ │ Northstar ││ │ < ──────────────────────────────── │ │└─────────────┘ computer_call (action) └─────────────┘ │ ^ │ execute action │ v │┌─────────────┐ ││ Lightcone │ screenshot of new state ││ OS │ ─────────────────────────────────────────>│└─────────────┘ (as computer_call_output)Each iteration:
- Northstar looks at the screenshot and decides the next action
- You execute the action on the computer
- You take a screenshot of the result
- You send it back as
computer_call_outputwithprevious_response_id - Repeat until the model sends a
messageordoneaction
When you pass previous_response_id, the server automatically prepends the full prior conversation — including the model’s own output (its messages and actions) — to your new input. You only need to send what’s new: the computer_call_output with the screenshot taken after executing the action.
When to use this vs. Tasks
Section titled “When to use this vs. Tasks”| Computer-use loop (Responses API) | Tasks | |
|---|---|---|
| Control | You control every step | Fully managed by Northstar |
| Customization | Add custom logic between steps | Limited to instructions |
| Observability | Full visibility into every action | Stream events |
| Complexity | More code to write | One API call |
| Best for | Custom workflows, hybrid systems | Simple end-to-end work |
See also
Section titled “See also”- Extracting information — how to get structured data out of a page after navigating to it (the two-phase pattern)
- Responses API — reference for creating responses, action types, and multi-turn chaining
- Run a task — simpler alternative using Tasks
- Coordinates — how Northstar’s coordinate system works and how to scale coordinates
- Operate a computer — direct computer control without an AI model
- simple.py — minimal CUA loop example