Process Thousands of Screenshots: Bulk API Guide

## When You Need Screenshots at Scale Some use cases demand thousands or tens of thousands of screenshots: monitoring a large portfolio of websites, generating thumbnails for a directory, creating visual archives, or running visual regression tests across hundreds of pages. Processing this volume requires more than a simple for-loop. You need concurrency control, error handling, rate limiting, and progress tracking. ## The Naive Approach (Don't Do This) ```javascript // ❌ Sequential — painfully slow for (const url of urls) { const screenshot = await takeScreenshot(url); saveScreenshot(screenshot); } // 10,000 URLs × 3 seconds each = 8+ hours ``` ## The Right Approach: Controlled Concurrency ### Node.js with p-limit ```javascript const axios = require('axios'); const pLimit = require('p-limit'); const fs = require('fs'); const API_KEY = process.env.DEVTOOLBOX_API_KEY; const CONCURRENCY = 10; // Parallel requests const limit = pLimit(CONCURRENCY); async function takeScreenshot(url, retries = 3) { for (let attempt = 1; attempt <= retries; attempt++) { try { const response = await axios.post( 'https://api.toolcenter.dev/v1/screenshot', { url, width: 1280, height: 800, format: 'png' }, { headers: { 'Authorization': `Bearer ${API_KEY}` }, responseType: 'arraybuffer', timeout: 30000, } ); return { url, success: true, data: response.data }; } catch (error) { if (attempt === retries) { return { url, success: false, error: error.message }; } // Exponential backoff await sleep(Math.pow(2, attempt) * 1000); } } } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async function processUrls(urls) { let completed = 0; const results = await Promise.all( urls.map(url => limit(async () => { const result = await takeScreenshot(url); completed++; if (completed % 100 === 0) { console.log(`Progress: ${completed}/${urls.length} (${((completed/urls.length)*100).toFixed(1)}%)`); } if (result.success) { const filename = urlToFilename(url); fs.writeFileSync(`./screenshots/${filename}`, result.data); } return result; }) ) ); const succeeded = results.filter(r => r.success).length; const failed = results.filter(r => !r.success).length; console.log(`\nComplete: ${succeeded} succeeded, ${failed} failed`); return results; } function urlToFilename(url) { return url.replace(/https?:\/\//, '').replace(/[^a-zA-Z0-9]/g, '_').slice(0, 100) + '.png'; } ``` ### Python with asyncio ```python import asyncio import aiohttp import os from urllib.parse import urlparse API_KEY = os.environ['DEVTOOLBOX_API_KEY'] CONCURRENCY = 10 semaphore = asyncio.Semaphore(CONCURRENCY) async def take_screenshot(session, url, retries=3): async with semaphore: for attempt in range(retries): try: async with session.post( 'https://api.toolcenter.dev/v1/screenshot', json={'url': url, 'width': 1280, 'height': 800, 'format': 'png'}, headers={'Authorization': f'Bearer {API_KEY}'}, timeout=aiohttp.ClientTimeout(total=30) ) as response: if response.status == 200: data = await response.read() return {'url': url, 'success': True, 'data': data} elif response.status == 429: # Rate limited — wait and retry await asyncio.sleep(2 ** (attempt + 1)) continue else: return {'url': url, 'success': False, 'error': f'HTTP {response.status}'} except Exception as e: if attempt == retries - 1: return {'url': url, 'success': False, 'error': str(e)} await asyncio.sleep(2 ** attempt) async def process_urls(urls): os.makedirs('screenshots', exist_ok=True) completed = 0 async with aiohttp.ClientSession() as session: tasks = [take_screenshot(session, url) for url in urls] results = [] for coro in asyncio.as_completed(tasks): result = await coro completed += 1 if result['success']: filename = url_to_filename(result['url']) with open(f'screenshots/{filename}', 'wb') as f: f.write(result['data']) if completed % 100 == 0: print(f'Progress: {completed}/{len(urls)}') results.append(result) succeeded = sum(1 for r in results if r['success']) print(f'Done: {succeeded}/{len(urls)} succeeded') return results def url_to_filename(url): parsed = urlparse(url) name = f"{parsed.netloc}{parsed.path}".replace('/', '_')[:100] return f"{name}.png" # Run it urls = open('urls.txt').read().strip().split('\n') asyncio.run(process_urls(urls)) ``` ## Rate Limiting and Backoff Respect API rate limits to avoid getting blocked: ```javascript class RateLimiter { constructor(maxRequests, windowMs) { this.maxRequests = maxRequests; this.windowMs = windowMs; this.requests = []; } async waitForSlot() { const now = Date.now(); this.requests = this.requests.filter(t => t > now - this.windowMs); if (this.requests.length >= this.maxRequests) { const oldestExpiry = this.requests[0] + this.windowMs; const waitTime = oldestExpiry - now; await sleep(waitTime); } this.requests.push(Date.now()); } } // 100 requests per minute const rateLimiter = new RateLimiter(100, 60000); async function rateLimitedScreenshot(url) { await rateLimiter.waitForSlot(); return takeScreenshot(url); } ``` ## Resumable Processing For very large batches, save progress to resume after failures: ```javascript const fs = require('fs'); class BatchProcessor { constructor(progressFile = 'progress.json') { this.progressFile = progressFile; this.progress = this.loadProgress(); } loadProgress() { try { return JSON.parse(fs.readFileSync(this.progressFile, 'utf-8')); } catch { return { completed: [], failed: [] }; } } saveProgress() { fs.writeFileSync(this.progressFile, JSON.stringify(this.progress, null, 2)); } async process(urls) { const remaining = urls.filter( url => !this.progress.completed.includes(url) ); console.log(`${remaining.length} URLs remaining (${this.progress.completed.length} already done)`); for (const url of remaining) { const result = await takeScreenshot(url); if (result.success) { this.progress.completed.push(url); } else { this.progress.failed.push({ url, error: result.error }); } // Save progress every 50 URLs if ((this.progress.completed.length + this.progress.failed.length) % 50 === 0) { this.saveProgress(); } } this.saveProgress(); } } ``` ## Storing Results Efficiently ### Upload to S3 ```javascript const { S3Client, PutObjectCommand } = require('@aws-sdk/client-s3'); const s3 = new S3Client({ region: 'us-east-1' }); async function uploadToS3(key, data) { await s3.send(new PutObjectCommand({ Bucket: 'my-screenshots-bucket', Key: `screenshots/${key}`, Body: data, ContentType: 'image/png', })); } ``` ### Compress Before Storage ```javascript const sharp = require('sharp'); async function compressScreenshot(pngBuffer) { return sharp(pngBuffer) .resize(1280, 800, { fit: 'inside' }) .webp({ quality: 80 }) .toBuffer(); } ``` ## Monitoring and Alerting Track your batch processing metrics: ```javascript class BatchMetrics { constructor() { this.startTime = Date.now(); this.succeeded = 0; this.failed = 0; this.totalBytes = 0; } record(result) { if (result.success) { this.succeeded++; this.totalBytes += result.data.length; } else { this.failed++; } } summary() { const elapsed = (Date.now() - this.startTime) / 1000; const total = this.succeeded + this.failed; return { total, succeeded: this.succeeded, failed: this.failed, successRate: `${((this.succeeded / total) * 100).toFixed(1)}%`, elapsed: `${elapsed.toFixed(0)}s`, rate: `${(total / elapsed * 60).toFixed(0)} screenshots/min`, totalSize: `${(this.totalBytes / 1024 / 1024).toFixed(1)} MB`, }; } } ``` ## Performance Tips 1. **Tune concurrency** — Start with 10 parallel requests and increase until you hit rate limits 2. **Use WebP format** — 30-50% smaller than PNG with minimal quality loss 3. **Skip full-page** — Viewport-only screenshots are faster than full-page 4. **Batch by domain** — Group URLs by domain to benefit from connection reuse 5. **Use regional endpoints** — Choose an API region closest to your target sites ## Conclusion Processing screenshots at scale requires controlled concurrency, robust error handling, and efficient storage. The ToolCenter handles the rendering complexity — your job is to orchestrate requests efficiently. With the patterns in this guide, you can process tens of thousands of screenshots reliably, whether it's a one-time batch or a recurring pipeline.