# robots.txt for RAEL.ORG
# Purpose: Maximize visibility in AI search/answers, with explicit control over AI training crawlers.

############################
# 1) Global default
############################
User-agent: *
Allow: /

# Declare your sitemap(s) so compliant crawlers discover canonical URLs quickly.
Sitemap: https://www.rael.org/sitemap_index.xml

############################
# 2) OpenAI
############################
# OAI-SearchBot: used for ChatGPT search/answers (discovery & citations).
User-agent: OAI-SearchBot
Allow: /

# GPTBot: OpenAI’s general crawler (often used for model improvements/training).
# Set Allow if you consent to training; switch to Disallow to opt out.
User-agent: GPTBot
Allow: /

# ChatGPT-User: fetches on a user’s request via ChatGPT UI.
User-agent: ChatGPT-User
Allow: /

############################
# 3) Anthropic (Claude)
############################
# Claude-SearchBot: indexing for Claude’s web answers.
User-agent: Claude-SearchBot
Allow: /

# Claude-User: per-user retrieval (when a Claude user opens your page).
User-agent: Claude-User
Allow: /

# ClaudeBot: Anthropic’s training crawler.
# Set Allow if you consent to training; switch to Disallow if you don’t.
User-agent: ClaudeBot
Allow: /

############################
# 4) Perplexity
############################
# PerplexityBot: indexer for Perplexity search results.
User-agent: PerplexityBot
Allow: /

# (FYI) Perplexity also has user-initiated fetching; robots.txt may not apply
# to all on-demand requests. Use server/IP controls if strict blocking is required.

############################
# 5) Google / Apple / Bing / Common Crawl
############################
# Google-Extended: controls use of your content in Google’s generative AI.
User-agent: Google-Extended
Allow: /

# Applebot-Extended: controls use in Apple’s AI features.
User-agent: Applebot-Extended
Allow: /

# Bing: primary crawler (feeds Bing + Copilot).
User-agent: bingbot
Allow: /

# Common Crawl: widely reused in research/AI datasets.
User-agent: CCBot
Allow: /

############################
# 6) Optional: restrict sensitive or utility paths (uncomment to use)
############################
# User-agent: *
# Disallow: /wp-admin/
# Disallow: /cart/
# Disallow: /checkout/
# Disallow: /thank-you/
# Disallow: /private/

############################
# Notes:
# - Keep this file at exactly: https://www.rael.org/robots.txt
# - Update Sitemap lines if you add localized or section sitemaps.
# - For a strict “no training” stance, change GPTBot/ClaudeBot/Google-Extended/
#   Applebot-Extended/CCBot blocks to "Disallow: /".
############################