# robots.txt for pro.makeup
# Maintained in repo (overrides Cloudflare's Managed robots.txt).
# Policy: friendly to verified search/AI agents; internal APIs are off-limits;
# heavy training-only crawlers are tier-blocked at the UA level so they can't
# crowd out the live agents we actually want.

User-agent: *
Allow: /
Disallow: /api/
Disallow: /dashboard
Disallow: /auth/

# --- Explicit per-UA tier differentiation -----------------------------------
# Live-search / per-query AI agents: always allowed. These are the crawlers
# that fetch on demand when a user asks a question, so blocking them blocks
# real users.
User-agent: ChatGPT-User
Allow: /
User-agent: OAI-SearchBot
Allow: /
User-agent: PerplexityBot
Allow: /
User-agent: Perplexity-User
Allow: /
User-agent: Claude-Web
Allow: /

# Search-engine crawlers: always allowed.
User-agent: Googlebot
Allow: /
User-agent: Bingbot
Allow: /
User-agent: Applebot
Allow: /

# Bulk training-only crawlers: blocked. Their content doesn't reach end-users
# in real time, and their fetch volumes are large enough to deserve their own
# bucket. We retain ai-train=yes in Content-Signal below so policy-respecting
# trainers can still opt in via legitimate channels (e.g. licensed corpus).
User-agent: CCBot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: AhrefsBot
Disallow: /
User-agent: DataForSeoBot
Disallow: /

# Cloudflare Content-Signals (RFC-style policy hints for AI use cases).
# We grant all three: search indexing, AI grounding/RAG, AI training.
# Update this line if business policy changes.
Content-Signal: search=yes, ai-train=yes, ai-input=yes

# NLWeb Schema Feeds — points at the Schema Map XML enumerating our
# structured data feeds (JSON-LD endpoints, OpenAPI, MCP card).
Schemamap: https://pro.makeup/schemamap.xml

Sitemap: https://pro.makeup/sitemap.xml