Scrape Vinted Listings with Python: Search + Pagination + Clean CSV Export
Vinted is a goldmine if you’re doing:
- resale / arbitrage research
- price tracking for brands and sizes
- “new listings” alerts for saved searches
- market snapshots (category → brand → price distribution)
But it’s also a site that will happily serve you a challenge page when it doesn’t like your traffic.
In this guide you’ll build a Vinted scraper in Python that:
- runs a real search query
- paginates results
- extracts the fields you actually use (title, brand, size, price, item URL, image URL)
- exports a clean CSV you can feed into Sheets / Airtable / a DB

Vinted is heavily protected, and direct requests often return challenge pages. ProxiesAPI gives you a reliable fetch layer so your pagination + export pipeline stays predictable as you scale.
What we’re scraping (and why we’re not guessing HTML selectors)
Vinted’s UI is modern and changes frequently. Scraping rendered HTML tends to be brittle.
Instead, we’ll call the same internal endpoint the site uses to load listing results:
https://www.vinted.co.uk/api/v2/catalog/items?search_text=...
Important notes:
- This is not an official public API; it can change without notice.
- Direct calls from your IP often return an HTML challenge page instead of JSON.
- The fix is not “more retries” — you need a stable fetch layer (proxies + backoff + sanity checks).
That’s the exact place ProxiesAPI fits.
Setup
python3 -m venv .venv
source .venv/bin/activate
pip install requests python-dotenv
Create a .env:
PROXIESAPI_KEY="YOUR_PROXIESAPI_KEY"
Step 1: Fetch JSON via ProxiesAPI (with a sanity check)
We’ll wrap ProxiesAPI as a simple “fetch this URL for me” layer.
import os
import time
import urllib.parse
from typing import Any
import requests
from dotenv import load_dotenv
load_dotenv()
PROXIESAPI_KEY = os.getenv("PROXIESAPI_KEY", "")
TIMEOUT = (10, 60) # connect, read
session = requests.Session()
session.headers.update({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36",
"Accept": "application/json,text/plain,*/*",
"Accept-Language": "en-GB,en;q=0.9",
})
def proxiesapi_url(target_url: str) -> str:
if not PROXIESAPI_KEY:
raise RuntimeError("Missing PROXIESAPI_KEY env var")
return "https://api.proxiesapi.com/?" + urllib.parse.urlencode({
"key": PROXIESAPI_KEY,
"url": target_url,
})
def fetch_json(url: str, *, retries: int = 4, backoff: float = 2.0) -> dict[str, Any]:
last_err: Exception | None = None
for attempt in range(1, retries + 1):
try:
r = session.get(proxiesapi_url(url), timeout=TIMEOUT)
r.raise_for_status()
# Vinted blocks often come back as HTML. This keeps failures obvious.
ct = (r.headers.get("content-type") or "").lower()
if "application/json" not in ct and r.text.lstrip().startswith("<"):
raise RuntimeError("Got HTML instead of JSON (likely blocked/challenged)")
return r.json()
except Exception as e:
last_err = e
sleep_s = backoff ** attempt
print(f"attempt {attempt}/{retries} failed: {e} -> sleeping {sleep_s:.1f}s")
time.sleep(sleep_s)
raise RuntimeError(f"Failed after {retries} retries: {last_err}")
Step 2: Build the Vinted search URL (pagination included)
Vinted search supports a few useful query parameters. The two you need to start:
search_text(your query)page(pagination)
We’ll keep this minimal and easy to extend.
def vinted_search_url(*, query: str, page: int) -> str:
base = "https://www.vinted.co.uk/api/v2/catalog/items"
params = {
"search_text": query,
"page": str(page),
"per_page": "24",
}
return base + "?" + urllib.parse.urlencode(params)
Step 3: Parse listings robustly (no brittle assumptions)
The internal response shape may evolve, so we’ll:
- find the first list-like field that looks like “items”
- extract only the fields we care about
- keep raw IDs and URLs so you can rehydrate later
from typing import Any
def pick_items_payload(data: dict[str, Any]) -> list[dict[str, Any]]:
# Common shapes we see across internal catalog APIs:
# - {"items": [...]}
# - {"catalog_items": [...]}
# - {"data": {"items": [...]}}
candidates = []
if isinstance(data.get("items"), list):
candidates.append(data["items"])
if isinstance(data.get("catalog_items"), list):
candidates.append(data["catalog_items"])
if isinstance(data.get("data"), dict) and isinstance(data["data"].get("items"), list):
candidates.append(data["data"]["items"])
if candidates:
return candidates[0]
# Last-resort: find any list of dicts that looks like listings.
for v in data.values():
if isinstance(v, list) and v and isinstance(v[0], dict):
return v
return []
def to_number(x: Any) -> float | None:
try:
return float(x)
except Exception:
return None
def parse_listings(data: dict[str, Any]) -> list[dict[str, Any]]:
items = pick_items_payload(data)
out: list[dict[str, Any]] = []
for it in items:
item_id = it.get("id") or it.get("item_id")
title = it.get("title") or it.get("name")
brand = None
if isinstance(it.get("brand"), dict):
brand = it["brand"].get("title") or it["brand"].get("name")
brand = brand or it.get("brand_title") or it.get("brand_name")
size = None
if isinstance(it.get("size"), dict):
size = it["size"].get("title") or it["size"].get("name")
size = size or it.get("size_title") or it.get("size")
price = None
currency = None
if isinstance(it.get("price"), dict):
price = to_number(it["price"].get("amount") or it["price"].get("value"))
currency = it["price"].get("currency_code") or it["price"].get("currency")
price = price if price is not None else to_number(it.get("price_numeric") or it.get("price"))
url = it.get("url") or it.get("item_url")
photo_url = None
if isinstance(it.get("photo"), dict):
photo_url = it["photo"].get("url")
if not photo_url and isinstance(it.get("photos"), list) and it["photos"]:
first = it["photos"][0]
if isinstance(first, dict):
photo_url = first.get("url")
photo_url = photo_url or it.get("photo_url") or it.get("image_url")
out.append({
"id": item_id,
"title": title,
"brand": brand,
"size": size,
"price": price,
"currency": currency,
"url": url,
"photo_url": photo_url,
})
return out
Step 4: Crawl pages and export to CSV
We’ll stop when a page returns zero items.
import csv
def crawl_search(query: str, *, max_pages: int = 10) -> list[dict[str, Any]]:
all_rows: list[dict[str, Any]] = []
for page in range(1, max_pages + 1):
url = vinted_search_url(query=query, page=page)
data = fetch_json(url)
rows = parse_listings(data)
print(f"page {page}: {len(rows)} listings")
if not rows:
break
all_rows.extend(rows)
time.sleep(0.8) # be polite; adjust for your needs
return all_rows
def write_csv(path: str, rows: list[dict[str, Any]]) -> None:
if not rows:
raise RuntimeError("No rows to write")
cols = ["id", "title", "brand", "size", "price", "currency", "url", "photo_url"]
with open(path, "w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=cols)
w.writeheader()
for r in rows:
w.writerow({k: r.get(k) for k in cols})
if __name__ == "__main__":
rows = crawl_search("nike dunk", max_pages=5)
write_csv("vinted_listings.csv", rows)
print("wrote vinted_listings.csv", len(rows))
Debugging: when you get HTML instead of JSON
If you see errors like:
- “Got HTML instead of JSON”
- or your payload starts with
<!doctype html>
You’re being challenged or blocked.
Practical checklist:
- Keep the content-type / HTML sanity check (so failures are loud).
- Add exponential backoff on retries (already included).
- Reduce concurrency (start with 1 request at a time).
- Use a stable proxy/unblock layer (ProxiesAPI).
Next upgrades
- Add filters (brand, price range, category IDs)
- Save to SQLite for incremental updates
- Implement “new listing alerts” by diffing daily runs
- Persist raw JSON alongside the CSV so you can re-parse later when fields change
Vinted is heavily protected, and direct requests often return challenge pages. ProxiesAPI gives you a reliable fetch layer so your pagination + export pipeline stays predictable as you scale.