Scrape Vinted Listings with Python: Search, Prices, Images, and Pagination
Vinted is a goldmine if you want second-hand market data:
- prices by brand/size/category
- listing velocity (what sells quickly)
- seller inventory patterns
- image datasets (for ML or QA)
But it’s also the kind of site where:
- search result pages are paginated
- you’ll quickly do many requests
- rate limits and occasional blocks are normal
In this guide we’ll scrape Vinted search listings into a clean dataset using Python.
We’ll:
- fetch search pages (timeouts + retries)
- parse listing cards (title, price, size, condition, seller, image)
- paginate through result pages
- export JSON/CSV
- use ProxiesAPI as a proxy-backed fetch layer for stability
Marketplaces can throttle repeated requests from a single IP. ProxiesAPI lets you proxy your fetches so pagination runs are less likely to die mid-crawl.
What we’re scraping (Vinted search results)
A Vinted search URL often looks like:
https://www.vinted.com/catalog?search_text=nike%20air%20force
(Depending on locale, you may see different paths/domains.)
From each listing card we want:
idtitleprice(+ currency)brandsizeconditionseller(name or handle)image_urlitem_url
Setup
python3 -m venv .venv
source .venv/bin/activate
pip install requests beautifulsoup4 lxml
Step 1: Fetch layer with retries (and optional ProxiesAPI)
As soon as you paginate search results (page 1 → 2 → 3 …), the network layer becomes the bottleneck.
Here’s a simple fetch helper that supports a ProxiesAPI “fetch URL”.
import os
import random
import time
import urllib.parse
import requests
TIMEOUT = (10, 35)
def build_proxiesapi_url(target_url: str) -> str:
key = os.environ.get("PROXIESAPI_KEY")
if not key:
raise RuntimeError("Missing PROXIESAPI_KEY env var")
return "https://api.proxiesapi.com/?" + urllib.parse.urlencode({
"auth_key": key,
"url": target_url,
})
def is_likely_blocked(html: str) -> bool:
h = (html or "").lower()
return any(s in h for s in [
"captcha",
"access denied",
"unusual traffic",
"verify you are",
])
def fetch_html(url: str, *, use_proxiesapi: bool = True, session: requests.Session | None = None) -> str:
s = session or requests.Session()
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
fetch_url = build_proxiesapi_url(url) if use_proxiesapi else url
last_err = None
for attempt in range(1, 6):
try:
r = s.get(fetch_url, headers=headers, timeout=TIMEOUT)
r.raise_for_status()
html = r.text
if is_likely_blocked(html):
raise RuntimeError("Blocked page detected")
return html
except Exception as e:
last_err = e
time.sleep(min(2 ** attempt, 20) + random.random())
raise RuntimeError(f"Failed to fetch after retries: {last_err}")
Set your ProxiesAPI key:
export PROXIESAPI_KEY="YOUR_KEY"
Step 2: Parse listing cards
Vinted card markup can change, so we use a few techniques:
- extract URLs/images from
a/imgtags inside a card - parse price text as a string and normalize
- avoid depending on one long CSS class chain
import re
from bs4 import BeautifulSoup
def clean_text(s: str) -> str:
return re.sub(r"\s+", " ", (s or "").strip())
def parse_price(text: str) -> dict:
t = clean_text(text)
# Very basic: "€12.00" or "12,00 €"
m = re.search(r"([€$£])\s*([0-9]+(?:[\.,][0-9]{1,2})?)", t)
if m:
return {"currency": m.group(1), "amount": float(m.group(2).replace(",", "."))}
m = re.search(r"([0-9]+(?:[\.,][0-9]{1,2})?)\s*([€$£])", t)
if m:
return {"currency": m.group(2), "amount": float(m.group(1).replace(",", "."))}
return {"currency": None, "amount": None, "raw": t}
def parse_search_results(html: str, base_url: str = "https://www.vinted.com") -> list[dict]:
soup = BeautifulSoup(html, "lxml")
items: list[dict] = []
# Heuristic: listing cards usually contain an <a> to /items/...
for a in soup.select("a[href*='/items/']"):
href = a.get("href")
if not href:
continue
item_url = href if href.startswith("http") else base_url.rstrip("/") + href
# climb to a plausible card container
card = a
for _ in range(5):
if card.name == "article" or (card.name == "div" and card.get("data-testid")):
break
card = card.parent
if not card:
break
# title: sometimes in aria-label or nearby text
title = clean_text(a.get("title") or a.get_text(" ", strip=True) or "")
if len(title) < 3:
title = None
img = a.select_one("img")
image_url = img.get("src") if img else None
# price: look for a nearby element containing currency symbol
price_text = ""
if card:
pt = card.get_text(" ", strip=True)
if any(sym in pt for sym in ["€", "$", "£"]):
price_text = pt
price = parse_price(price_text)
# Vinted IDs are often present in the URL path
m = re.search(r"/items/(\d+)", item_url)
item_id = m.group(1) if m else None
items.append({
"id": item_id,
"title": title,
"price": price.get("amount"),
"currency": price.get("currency"),
"image_url": image_url,
"item_url": item_url,
})
# de-dupe by item_url
uniq = []
seen = set()
for it in items:
u = it.get("item_url")
if not u or u in seen:
continue
seen.add(u)
uniq.append(it)
return uniq
This is intentionally a starter parser. Once you run it against your locale, inspect the HTML and tighten the selectors (for brand/size/condition/seller).
Step 3: Pagination
Vinted search pages usually support page= or cursor-based pagination.
We’ll implement page= first (the most common pattern). If your locale uses a cursor param, you can swap it in.
import urllib.parse
def set_page(url: str, page: int) -> str:
u = urllib.parse.urlparse(url)
q = dict(urllib.parse.parse_qsl(u.query))
q["page"] = str(page)
new_q = urllib.parse.urlencode(q)
return urllib.parse.urlunparse((u.scheme, u.netloc, u.path, u.params, new_q, u.fragment))
def crawl_search(start_url: str, pages: int = 5, use_proxiesapi: bool = True) -> list[dict]:
s = requests.Session()
all_items: list[dict] = []
seen = set()
for p in range(1, pages + 1):
url = start_url if p == 1 else set_page(start_url, p)
html = fetch_html(url, use_proxiesapi=use_proxiesapi, session=s)
batch = parse_search_results(html)
print(f"page {p}/{pages} -> {len(batch)} items")
for it in batch:
if it["item_url"] in seen:
continue
seen.add(it["item_url"])
all_items.append(it)
time.sleep(1.0 + random.random())
return all_items
Run it
import json
START = "https://www.vinted.com/catalog?search_text=nike%20air%20force"
items = crawl_search(START, pages=10, use_proxiesapi=True)
print("total", len(items))
with open("vinted_items.json", "w", encoding="utf-8") as f:
json.dump(items, f, ensure_ascii=False, indent=2)
print("wrote vinted_items.json")
Export CSV (optional)
import csv
with open("vinted_items.csv", "w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=["id", "title", "price", "currency", "image_url", "item_url"])
w.writeheader()
w.writerows(items)
print("wrote vinted_items.csv")
Where ProxiesAPI fits (honestly)
On marketplace crawls you usually fail in one of three ways:
- requests start returning 429 / throttling
- requests start returning bot pages
- results silently degrade (you get HTML that isn’t the real listings)
ProxiesAPI helps with the IP-side of that problem by proxying your requests.
It won’t fix:
- broken selectors
- too-fast pagination
- JS-only rendering
But it will often make a big difference to long “page 1 → page 50” runs.
QA checklist
- Page 1 returns real listing cards
- Pagination increments and changes results
- You’re extracting stable
item_urland image URLs - CSV/JSON exports are valid
- You handle blocks (retries + backoff)
Marketplaces can throttle repeated requests from a single IP. ProxiesAPI lets you proxy your fetches so pagination runs are less likely to die mid-crawl.