Process Thousands of Screenshots: Bulk API Guide

C
Christian Mesa
Feb 26, 2026
5 min read

When You Need Screenshots at Scale

Some use cases demand thousands or tens of thousands of screenshots: monitoring a large portfolio of websites, generating thumbnails for a directory, creating visual archives, or running visual regression tests across hundreds of pages.

Processing this volume requires more than a simple for-loop. You need concurrency control, error handling, rate limiting, and progress tracking.

The Naive Approach (Don't Do This)

// ❌ Sequential — painfully slow
for (const url of urls) {
  const screenshot = await takeScreenshot(url);
  saveScreenshot(screenshot);
}
// 10,000 URLs × 3 seconds each = 8+ hours

The Right Approach: Controlled Concurrency

Node.js with p-limit

const axios = require('axios');
const pLimit = require('p-limit');
const fs = require('fs');

const API_KEY = process.env.DEVTOOLBOX_API_KEY;
const CONCURRENCY = 10; // Parallel requests
const limit = pLimit(CONCURRENCY);

async function takeScreenshot(url, retries = 3) {
  for (let attempt = 1; attempt <= retries; attempt++) {
    try {
      const response = await axios.post(
        'https://api.toolcenter.dev/v1/screenshot',
        { url, width: 1280, height: 800, format: 'png' },
        {
          headers: { 'Authorization': `Bearer ${API_KEY}` },
          responseType: 'arraybuffer',
          timeout: 30000,
        }
      );
      return { url, success: true, data: response.data };
    } catch (error) {
      if (attempt === retries) {
        return { url, success: false, error: error.message };
      }
      // Exponential backoff
      await sleep(Math.pow(2, attempt) * 1000);
    }
  }
}

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

async function processUrls(urls) {
  let completed = 0;

  const results = await Promise.all(
    urls.map(url =>
      limit(async () => {
        const result = await takeScreenshot(url);
        completed++;

        if (completed % 100 === 0) {
          console.log(`Progress: ${completed}/${urls.length} (${((completed/urls.length)*100).toFixed(1)}%)`);
        }

        if (result.success) {
          const filename = urlToFilename(url);
          fs.writeFileSync(`./screenshots/${filename}`, result.data);
        }

        return result;
      })
    )
  );

  const succeeded = results.filter(r => r.success).length;
  const failed = results.filter(r => !r.success).length;

  console.log(`\nComplete: ${succeeded} succeeded, ${failed} failed`);
  return results;
}

function urlToFilename(url) {
  return url.replace(/https?:\/\//, '').replace(/[^a-zA-Z0-9]/g, '_').slice(0, 100) + '.png';
}

Python with asyncio

import asyncio
import aiohttp
import os
from urllib.parse import urlparse

API_KEY = os.environ['DEVTOOLBOX_API_KEY']
CONCURRENCY = 10
semaphore = asyncio.Semaphore(CONCURRENCY)

async def take_screenshot(session, url, retries=3):
    async with semaphore:
        for attempt in range(retries):
            try:
                async with session.post(
                    'https://api.toolcenter.dev/v1/screenshot',
                    json={'url': url, 'width': 1280, 'height': 800, 'format': 'png'},
                    headers={'Authorization': f'Bearer {API_KEY}'},
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:
                    if response.status == 200:
                        data = await response.read()
                        return {'url': url, 'success': True, 'data': data}
                    elif response.status == 429:
                        # Rate limited — wait and retry
                        await asyncio.sleep(2 ** (attempt + 1))
                        continue
                    else:
                        return {'url': url, 'success': False, 'error': f'HTTP {response.status}'}
            except Exception as e:
                if attempt == retries - 1:
                    return {'url': url, 'success': False, 'error': str(e)}
                await asyncio.sleep(2 ** attempt)

async def process_urls(urls):
    os.makedirs('screenshots', exist_ok=True)
    completed = 0

    async with aiohttp.ClientSession() as session:
        tasks = [take_screenshot(session, url) for url in urls]
        results = []

        for coro in asyncio.as_completed(tasks):
            result = await coro
            completed += 1

            if result['success']:
                filename = url_to_filename(result['url'])
                with open(f'screenshots/{filename}', 'wb') as f:
                    f.write(result['data'])

            if completed % 100 == 0:
                print(f'Progress: {completed}/{len(urls)}')

            results.append(result)

    succeeded = sum(1 for r in results if r['success'])
    print(f'Done: {succeeded}/{len(urls)} succeeded')
    return results

def url_to_filename(url):
    parsed = urlparse(url)
    name = f"{parsed.netloc}{parsed.path}".replace('/', '_')[:100]
    return f"{name}.png"

# Run it
urls = open('urls.txt').read().strip().split('\n')
asyncio.run(process_urls(urls))

Rate Limiting and Backoff

Respect API rate limits to avoid getting blocked:

class RateLimiter {
  constructor(maxRequests, windowMs) {
    this.maxRequests = maxRequests;
    this.windowMs = windowMs;
    this.requests = [];
  }

  async waitForSlot() {
    const now = Date.now();
    this.requests = this.requests.filter(t => t > now - this.windowMs);

    if (this.requests.length >= this.maxRequests) {
      const oldestExpiry = this.requests[0] + this.windowMs;
      const waitTime = oldestExpiry - now;
      await sleep(waitTime);
    }

    this.requests.push(Date.now());
  }
}

// 100 requests per minute
const rateLimiter = new RateLimiter(100, 60000);

async function rateLimitedScreenshot(url) {
  await rateLimiter.waitForSlot();
  return takeScreenshot(url);
}

Resumable Processing

For very large batches, save progress to resume after failures:

const fs = require('fs');

class BatchProcessor {
  constructor(progressFile = 'progress.json') {
    this.progressFile = progressFile;
    this.progress = this.loadProgress();
  }

  loadProgress() {
    try {
      return JSON.parse(fs.readFileSync(this.progressFile, 'utf-8'));
    } catch {
      return { completed: [], failed: [] };
    }
  }

  saveProgress() {
    fs.writeFileSync(this.progressFile, JSON.stringify(this.progress, null, 2));
  }

  async process(urls) {
    const remaining = urls.filter(
      url => !this.progress.completed.includes(url)
    );

    console.log(`${remaining.length} URLs remaining (${this.progress.completed.length} already done)`);

    for (const url of remaining) {
      const result = await takeScreenshot(url);

      if (result.success) {
        this.progress.completed.push(url);
      } else {
        this.progress.failed.push({ url, error: result.error });
      }

      // Save progress every 50 URLs
      if ((this.progress.completed.length + this.progress.failed.length) % 50 === 0) {
        this.saveProgress();
      }
    }

    this.saveProgress();
  }
}

Storing Results Efficiently

Upload to S3

const { S3Client, PutObjectCommand } = require('@aws-sdk/client-s3');

const s3 = new S3Client({ region: 'us-east-1' });

async function uploadToS3(key, data) {
  await s3.send(new PutObjectCommand({
    Bucket: 'my-screenshots-bucket',
    Key: `screenshots/${key}`,
    Body: data,
    ContentType: 'image/png',
  }));
}

Compress Before Storage

const sharp = require('sharp');

async function compressScreenshot(pngBuffer) {
  return sharp(pngBuffer)
    .resize(1280, 800, { fit: 'inside' })
    .webp({ quality: 80 })
    .toBuffer();
}

Monitoring and Alerting

Track your batch processing metrics:

class BatchMetrics {
  constructor() {
    this.startTime = Date.now();
    this.succeeded = 0;
    this.failed = 0;
    this.totalBytes = 0;
  }

  record(result) {
    if (result.success) {
      this.succeeded++;
      this.totalBytes += result.data.length;
    } else {
      this.failed++;
    }
  }

  summary() {
    const elapsed = (Date.now() - this.startTime) / 1000;
    const total = this.succeeded + this.failed;
    return {
      total,
      succeeded: this.succeeded,
      failed: this.failed,
      successRate: `${((this.succeeded / total) * 100).toFixed(1)}%`,
      elapsed: `${elapsed.toFixed(0)}s`,
      rate: `${(total / elapsed * 60).toFixed(0)} screenshots/min`,
      totalSize: `${(this.totalBytes / 1024 / 1024).toFixed(1)} MB`,
    };
  }
}

Performance Tips

  1. Tune concurrency — Start with 10 parallel requests and increase until you hit rate limits
  2. Use WebP format — 30-50% smaller than PNG with minimal quality loss
  3. Skip full-page — Viewport-only screenshots are faster than full-page
  4. Batch by domain — Group URLs by domain to benefit from connection reuse
  5. Use regional endpoints — Choose an API region closest to your target sites

Conclusion

Processing screenshots at scale requires controlled concurrency, robust error handling, and efficient storage. The ToolCenter handles the rendering complexity — your job is to orchestrate requests efficiently. With the patterns in this guide, you can process tens of thousands of screenshots reliably, whether it's a one-time batch or a recurring pipeline.

Share this article

CM

Christian Mesa

Founder & Developer at ToolCenter

Full-stack developer from the Canary Islands, Spain. Building developer tools and APIs that simplify web development. Passionate about clean code, performance, and making complex things simple.

Try ToolCenter APIs Free

100 API calls/month free. No credit card required.

Related Posts