Skip to content
Dashboard
Use Cases

Web Scraping

Extract structured data from any website, even behind logins and bot detection.

Use Lightcone to scrape websites that block simple HTTP requests. Lightcone’s stealth browsers render JavaScript, handle bot detection, and support residential proxies — so you can extract data from sites that tools like requests or fetch can’t reach.

scrape.py
from tzafon import Lightcone
import json
client = Lightcone()
with client.computer.create(
kind="browser",
use_advanced_proxy=True,
) as computer:
# Navigate to the target page
computer.navigate("https://books.toscrape.com/catalogue/category/books/science_22/index.html")
computer.wait(2)
# Extract the page HTML
html_result = computer.html()
html_content = computer.get_html_content(html_result)
# Take a screenshot for debugging
result = computer.screenshot()
print(f"Screenshot: {computer.get_screenshot_url(result)}")
print(f"HTML length: {len(html_content)} chars")
scrape.ts
import Lightcone from "@tzafon/lightcone";
const client = new Lightcone();
const computer = await client.computers.create({
kind: "browser",
use_advanced_proxy: true,
});
const id = computer.id!;
try {
await client.computers.navigate(id, {
url: "https://books.toscrape.com/catalogue/category/books/science_22/index.html",
});
const htmlResult = await client.computers.html(id);
console.log("HTML retrieved");
const screenshot = await client.computers.screenshot(id);
console.log("Screenshot:", screenshot.result?.screenshot_url);
} finally {
await client.computers.delete(id);
}

Handle pagination by clicking “Next” and collecting data across multiple pages:

with client.computer.create(kind="browser", use_advanced_proxy=True) as computer:
computer.navigate("https://books.toscrape.com")
computer.wait(2)
all_pages_html = []
for page_num in range(1, 4): # Scrape first 3 pages
html_result = computer.html()
all_pages_html.append(computer.get_html_content(html_result))
print(f"Scraped page {page_num}")
# Click the "next" button
try:
computer.click(720, 780) # Coordinates of the "next" button
computer.wait(2)
except Exception:
print("No more pages")
break
print(f"Total pages scraped: {len(all_pages_html)}")
const computer = await client.computers.create({
kind: "browser",
use_advanced_proxy: true,
});
const id = computer.id!;
const allPagesHtml: string[] = [];
try {
await client.computers.navigate(id, { url: "https://books.toscrape.com" });
for (let page = 1; page <= 3; page++) {
const htmlResult = await client.computers.html(id);
console.log(`Scraped page ${page}`);
// Click next
await client.computers.click(id, { x: 720, y: 780 });
await new Promise((r) => setTimeout(r, 2000));
}
} finally {
await client.computers.delete(id);
}

Coordinates in these examples (like computer.click(720, 780)) are illustrative. Take a screenshot first to find the actual element positions on the page you’re scraping.

Use persistent sessions to maintain login state:

# First run: log in and save the session
with client.computer.create(kind="browser", persistent=True) as computer:
computer.navigate("https://app.example.com/login")
computer.wait(2)
computer.click(400, 300) # Username field
computer.type("user@example.com")
computer.click(400, 360) # Password field
computer.type("password123")
computer.click(400, 420) # Submit button
computer.wait(3)
session_id = computer.id
print(f"Session saved: {session_id}")
# Later runs: restore the session and scrape
with client.computer.create(
kind="browser",
environment_id=session_id,
) as computer:
computer.navigate("https://app.example.com/dashboard")
computer.wait(2)
html_result = computer.html()
# Already logged in — cookies were restored
// First run: log in and save
const session = await client.computers.create({
kind: "browser",
persistent: true,
});
const id = session.id!;
await client.computers.navigate(id, { url: "https://app.example.com/login" });
await client.computers.click(id, { x: 400, y: 300 });
await client.computers.type(id, { text: "user@example.com" });
await client.computers.click(id, { x: 400, y: 360 });
await client.computers.type(id, { text: "password123" });
await client.computers.click(id, { x: 400, y: 420 });
await client.computers.delete(id);
// Later: restore and scrape
const restored = await client.computers.create({
kind: "browser",
environment_id: id,
});
await client.computers.navigate(restored.id!, {
url: "https://app.example.com/dashboard",
});
// Already logged in

For complex scraping tasks, let the agent figure out the navigation:

for event in client.agent.tasks.start_stream(
instruction=(
"Go to https://books.toscrape.com. "
"Find all books in the 'Science' category. "
"For each book, note the title and price. "
"Report the results."
),
kind="browser",
):
print(event)
const stream = await client.agent.tasks.startStream({
instruction:
"Go to https://books.toscrape.com. " +
"Find all books in the 'Science' category. " +
"For each book, note the title and price. " +
"Report the results.",
kind: "browser",
});
for await (const event of stream) {
console.log(event);
}
  • Use use_advanced_proxy: true for sites with bot detection
  • Add computer.wait() after navigation to let pages fully render
  • Take screenshots before interacting to verify the page state
  • Use persistent sessions to avoid re-authenticating on every run
  • Use the Playwright integration when you need CSS selectors instead of coordinates