Computer-Use Loop

Using Northstar

Build a multi-turn loop using the Responses API where you control every step.

A computer-use loop is a pattern where you repeatedly send screenshots to Northstar, the model decides the next action, and you execute it on a cloud computer. This guide shows how to build a complete loop using the Responses API.

Prerequisites: Understand Computers (lifecycle and actions) and the Responses API (request/response format).

The full loop

from tzafon import Lightcone

client = Lightcone()

# 1. Spin up a cloud computer
with client.computer.create(kind="desktop") as computer:
    # 2. Take an initial screenshot
    screenshot = computer.screenshot()
    screenshot_url = computer.get_screenshot_url(screenshot)

    # 3. Send the first request — use array input to include the screenshot
    response = client.responses.create(
        model="tzafon.northstar-cua-fast",
        input=[
            {
                "role": "user",
                "content": [
                    {"type": "input_text", "text": "Open the terminal and run 'uname -a'"},
                    {"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
                ],
            },
        ],
        tools=[{
            "type": "computer_use",
            "display_width": 1280,
            "display_height": 720,
            "environment": "desktop",
        }],
    )

    # 4. Loop until Northstar stops requesting actions
    while True:
        # Find computer_call items in the output
        computer_call = None
        for item in response.output or []:
            if item.type == "computer_call":
                computer_call = item
            elif item.type == "message":
                for block in item.content or []:
                    print(f"Northstar says: {block.text}")

        if not computer_call:
            print("Done.")
            break

        # 5. Execute the action
        action = computer_call.action
        print(f"Executing: {action.type}")

        if action.type == "click" and getattr(action, "button", "left") == "right":
            computer.right_click(action.x, action.y)
        elif action.type == "click":
            computer.click(action.x, action.y)
        elif action.type == "double_click":
            computer.double_click(action.x, action.y)
        elif action.type == "type":
            computer.type(action.text)
        elif action.type in ("key", "keypress"):
            computer.hotkey(*action.keys)
        elif action.type == "scroll":
            computer.scroll(0, action.scroll_y or 0, action.x or 640, action.y or 400)
        elif action.type == "hscroll":
            computer.scroll(action.scroll_x or 0, 0, action.x or 640, action.y or 400)
        elif action.type == "drag":
            computer.drag(action.x, action.y, action.end_x, action.end_y)
        elif action.type == "navigate":
            computer.navigate(action.url)
        elif action.type == "wait":
            computer.wait(2)
        elif action.type == "terminate":
            print(f"{action.status}: {action.result}")
            break
        elif action.type == "answer":
            print(f"Answer: {action.result}")
            break
        elif action.type == "done":
            print(f"Done: {action.text}")
            break

        computer.wait(1)

        # 6. Take a new screenshot and feed it back.
        #    previous_response_id tells the server to include the full prior
        #    conversation (including the model's own output), so we only send
        #    what's new: the screenshot after the action.
        screenshot = computer.screenshot()
        screenshot_url = computer.get_screenshot_url(screenshot)

        response = client.responses.create(
            model="tzafon.northstar-cua-fast",
            previous_response_id=response.id,
            input=[{
                "type": "computer_call_output",
                "call_id": computer_call.call_id,
                "output": {"type": "input_image", "image_url": screenshot_url, "detail": "auto"},
            }],
            tools=[{
                "type": "computer_use",
                "display_width": 1280,
                "display_height": 720,
                "environment": "desktop",
            }],
        )

import Lightcone from "@tzafon/lightcone";

const client = new Lightcone();
const computer = await client.computers.create({ kind: "desktop" });
const id = computer.id!;

try {
  // Take initial screenshot
  const initialScreenshot = await client.computers.screenshot(id);
  const screenshotUrl = initialScreenshot.result?.screenshot_url as string;

  // First request — use array input to include the screenshot
  let response = await client.responses.create({
    model: "tzafon.northstar-cua-fast",
    input: [
      {
        role: "user",
        content: [
          { type: "input_text", text: "Open the terminal and run 'uname -a'" },
          { type: "input_image", image_url: screenshotUrl, detail: "auto" },
        ],
      },
    ],
    tools: [{
      type: "computer_use",
      display_width: 1280,
      display_height: 720,
      environment: "desktop",
    }],
  });

  // Loop until done
  while (true) {
    const computerCall = response.output?.find((item) => item.type === "computer_call");
    const message = response.output?.find((item) => item.type === "message");

    if (message) {
      for (const block of message.content ?? []) {
        console.log(`Northstar says: ${block.text}`);
      }
    }

    if (!computerCall) {
      console.log("Done.");
      break;
    }

    const action = computerCall.action!;
    console.log(`Executing: ${action.type}`);

    // Execute the action — right-clicks come as click with button: "right"
    if (action.type === "click" && action.button === "right") {
      await client.computers.rightClick(id, { x: action.x!, y: action.y! });
    } else switch (action.type) {
      case "click":
        await client.computers.click(id, { x: action.x!, y: action.y! });
        break;
      case "double_click":
        await client.computers.doubleClick(id, { x: action.x!, y: action.y! });
        break;
      case "type":
        await client.computers.type(id, { text: action.text! });
        break;
      case "key":
      case "keypress":
        await client.computers.hotkey(id, { keys: action.keys! });
        break;
      case "scroll":
        await client.computers.scroll(id, {
          dx: 0, dy: action.scroll_y ?? 0,
          x: action.x ?? 640, y: action.y ?? 400,
        });
        break;
      case "hscroll":
        await client.computers.scroll(id, {
          dx: action.scroll_x ?? 0, dy: 0,
          x: action.x ?? 640, y: action.y ?? 400,
        });
        break;
      case "drag":
        await client.computers.drag(id, {
          x1: action.x!, y1: action.y!,
          x2: action.end_x!, y2: action.end_y!,
        });
        break;
      case "navigate":
        await client.computers.navigate(id, { url: action.url! });
        break;
      case "terminate":
        console.log(`${action.status}: ${action.result}`);
        break;
      case "answer":
        console.log(`Answer: ${action.result}`);
        break;
      case "done":
        console.log(`Done: ${action.text}`);
        break;
    }

    // Screenshot and feed back.
    // previous_response_id tells the server to include the full prior
    // conversation (including the model's own output), so we only send
    // what's new: the screenshot after the action.
    await new Promise((r) => setTimeout(r, 1000));
    const newScreenshot = await client.computers.screenshot(id);
    const newUrl = newScreenshot.result?.screenshot_url as string;

    response = await client.responses.create({
      model: "tzafon.northstar-cua-fast",
      previous_response_id: response.id!,
      input: [{
        type: "computer_call_output",
        call_id: computerCall.call_id!,
        output: { type: "input_image", image_url: newUrl, detail: "auto" },
      }],
      tools: [{
        type: "computer_use",
        display_width: 1280,
        display_height: 720,
        environment: "desktop",
      }],
    });
  }
} finally {
  await client.computers.delete(id);
}

How it works

┌─────────────┐     instruction + screenshot     ┌─────────────┐
│             │ ──────────────────────────────── > │             │
│  Your Code  │                                   │  Northstar  │
│             │ < ──────────────────────────────── │             │
└─────────────┘     computer_call (action)        └─────────────┘
       │                                                 ^
       │  execute action                                 │
       v                                                 │
┌─────────────┐                                          │
│  Lightcone  │     screenshot of new state              │
│     OS      │ ─────────────────────────────────────────>│
└─────────────┘   (as computer_call_output)

Each iteration:

Northstar looks at the screenshot and decides the next action
You execute the action on the computer
You take a screenshot of the result
You send it back as computer_call_output with previous_response_id
Repeat until the model sends a message or done action

When you pass previous_response_id, the server automatically prepends the full prior conversation — including the model’s own output (its messages and actions) — to your new input. You only need to send what’s new: the computer_call_output with the screenshot taken after executing the action.

When to use this vs. Tasks

	Computer-use loop (Responses API)	Tasks
Control	You control every step	Fully managed by Northstar
Customization	Add custom logic between steps	Limited to instructions
Observability	Full visibility into every action	Stream events
Complexity	More code to write	One API call
Best for	Custom workflows, hybrid systems	Simple end-to-end work

Computer-Use Loop

The full loop

How it works

When to use this vs. Tasks

See also

What can I help you with?

Suggestions