Skip to content
NorthstarPlatformPricingLogin
Using Northstar

Computer-Use Loop

Build a multi-turn loop using the Responses API where you control every step.

A computer-use loop is a pattern where you repeatedly send screenshots to Northstar, the model decides the next action, and you execute it on a cloud computer. This guide shows how to build a complete loop using the Responses API.

Prerequisites: Understand Computers (lifecycle and actions) and the Responses API (request/response format).

cua_loop.py
from tzafon import Lightcone
client = Lightcone()
# 1. Spin up a cloud computer
with client.computer.create(kind="desktop") as computer:
# 2. Take an initial screenshot
screenshot = computer.screenshot()
screenshot_url = computer.get_screenshot_url(screenshot)
# 3. Send the first request — use array input to include the screenshot
response = client.responses.create(
model="tzafon.northstar-cua-fast",
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": "Open the terminal and run 'uname -a'"},
{"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
],
},
],
tools=[{
"type": "computer_use",
"display_width": 1280,
"display_height": 720,
"environment": "desktop",
}],
)
# 4. Loop until Northstar stops requesting actions
while True:
# Find computer_call items in the output
computer_call = None
for item in response.output or []:
if item.type == "computer_call":
computer_call = item
elif item.type == "message":
for block in item.content or []:
print(f"Northstar says: {block.text}")
if not computer_call:
print("Done.")
break
# 5. Execute the action
action = computer_call.action
print(f"Executing: {action.type}")
if action.type == "click" and getattr(action, "button", "left") == "right":
computer.right_click(action.x, action.y)
elif action.type == "click":
computer.click(action.x, action.y)
elif action.type == "double_click":
computer.double_click(action.x, action.y)
elif action.type == "type":
computer.type(action.text)
elif action.type in ("key", "keypress"):
computer.hotkey(*action.keys)
elif action.type == "scroll":
computer.scroll(0, action.scroll_y or 0, action.x or 640, action.y or 400)
elif action.type == "hscroll":
computer.scroll(action.scroll_x or 0, 0, action.x or 640, action.y or 400)
elif action.type == "drag":
computer.drag(action.x, action.y, action.end_x, action.end_y)
elif action.type == "navigate":
computer.navigate(action.url)
elif action.type == "wait":
computer.wait(2)
elif action.type == "terminate":
print(f"{action.status}: {action.result}")
break
elif action.type == "answer":
print(f"Answer: {action.result}")
break
elif action.type == "done":
print(f"Done: {action.text}")
break
computer.wait(1)
# 6. Take a new screenshot and feed it back.
# previous_response_id tells the server to include the full prior
# conversation (including the model's own output), so we only send
# what's new: the screenshot after the action.
screenshot = computer.screenshot()
screenshot_url = computer.get_screenshot_url(screenshot)
response = client.responses.create(
model="tzafon.northstar-cua-fast",
previous_response_id=response.id,
input=[{
"type": "computer_call_output",
"call_id": computer_call.call_id,
"output": {"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
}],
tools=[{
"type": "computer_use",
"display_width": 1280,
"display_height": 720,
"environment": "desktop",
}],
)
cua_loop.ts
import Lightcone from "@tzafon/lightcone";
const client = new Lightcone();
const computer = await client.computers.create({ kind: "desktop" });
const id = computer.id!;
try {
// Take initial screenshot
const initialScreenshot = await client.computers.screenshot(id);
const screenshotUrl = initialScreenshot.result?.screenshot_url as string;
// First request — use array input to include the screenshot
let response = await client.responses.create({
model: "tzafon.northstar-cua-fast",
input: [
{
role: "user",
content: [
{ type: "input_text", text: "Open the terminal and run 'uname -a'" },
{ type: "input_image", image_url: screenshotUrl, detail: "auto" },
],
},
],
tools: [{
type: "computer_use",
display_width: 1280,
display_height: 720,
environment: "desktop",
}],
});
// Loop until done
while (true) {
const computerCall = response.output?.find((item) => item.type === "computer_call");
const message = response.output?.find((item) => item.type === "message");
if (message) {
for (const block of message.content ?? []) {
console.log(`Northstar says: ${block.text}`);
}
}
if (!computerCall) {
console.log("Done.");
break;
}
const action = computerCall.action!;
console.log(`Executing: ${action.type}`);
// Execute the action — right-clicks come as click with button: "right"
if (action.type === "click" && action.button === "right") {
await client.computers.rightClick(id, { x: action.x!, y: action.y! });
} else switch (action.type) {
case "click":
await client.computers.click(id, { x: action.x!, y: action.y! });
break;
case "double_click":
await client.computers.doubleClick(id, { x: action.x!, y: action.y! });
break;
case "type":
await client.computers.type(id, { text: action.text! });
break;
case "key":
case "keypress":
await client.computers.hotkey(id, { keys: action.keys! });
break;
case "scroll":
await client.computers.scroll(id, {
dx: 0, dy: action.scroll_y ?? 0,
x: action.x ?? 640, y: action.y ?? 400,
});
break;
case "hscroll":
await client.computers.scroll(id, {
dx: action.scroll_x ?? 0, dy: 0,
x: action.x ?? 640, y: action.y ?? 400,
});
break;
case "drag":
await client.computers.drag(id, {
x1: action.x!, y1: action.y!,
x2: action.end_x!, y2: action.end_y!,
});
break;
case "navigate":
await client.computers.navigate(id, { url: action.url! });
break;
case "terminate":
console.log(`${action.status}: ${action.result}`);
break;
case "answer":
console.log(`Answer: ${action.result}`);
break;
case "done":
console.log(`Done: ${action.text}`);
break;
}
// Screenshot and feed back.
// previous_response_id tells the server to include the full prior
// conversation (including the model's own output), so we only send
// what's new: the screenshot after the action.
await new Promise((r) => setTimeout(r, 1000));
const newScreenshot = await client.computers.screenshot(id);
const newUrl = newScreenshot.result?.screenshot_url as string;
response = await client.responses.create({
model: "tzafon.northstar-cua-fast",
previous_response_id: response.id!,
input: [{
type: "computer_call_output",
call_id: computerCall.call_id!,
output: { type: "input_image", image_url: newUrl, detail: "auto" },
}],
tools: [{
type: "computer_use",
display_width: 1280,
display_height: 720,
environment: "desktop",
}],
});
}
} finally {
await client.computers.delete(id);
}
┌─────────────┐ instruction + screenshot ┌─────────────┐
│ │ ──────────────────────────────── > │ │
│ Your Code │ │ Northstar │
│ │ < ──────────────────────────────── │ │
└─────────────┘ computer_call (action) └─────────────┘
│ ^
│ execute action │
v │
┌─────────────┐ │
│ Lightcone │ screenshot of new state │
│ OS │ ─────────────────────────────────────────>│
└─────────────┘ (as computer_call_output)

Each iteration:

  1. Northstar looks at the screenshot and decides the next action
  2. You execute the action on the computer
  3. You take a screenshot of the result
  4. You send it back as computer_call_output with previous_response_id
  5. Repeat until the model sends a message or done action

When you pass previous_response_id, the server automatically prepends the full prior conversation — including the model’s own output (its messages and actions) — to your new input. You only need to send what’s new: the computer_call_output with the screenshot taken after executing the action.

Computer-use loop (Responses API)Tasks
ControlYou control every stepFully managed by Northstar
CustomizationAdd custom logic between stepsLimited to instructions
ObservabilityFull visibility into every actionStream events
ComplexityMore code to writeOne API call
Best forCustom workflows, hybrid systemsSimple end-to-end work
  • Extracting information — how to get structured data out of a page after navigating to it (the two-phase pattern)
  • Responses API — reference for creating responses, action types, and multi-turn chaining
  • Run a task — simpler alternative using Tasks
  • Coordinates — how Northstar’s coordinate system works and how to scale coordinates
  • Operate a computer — direct computer control without an AI model
  • simple.py — minimal CUA loop example