# You can paste any of these blocks into robots.txt or a firewall rule. grouped by company to make things readable for y'all.

# ——— OPENAI ———
# Search (shows my webpages as links inside ChatGPT search). NOT used for model training.
User-agent: OAI-SearchBot
Allow: /

# User-driven browsing from ChatGPT and Custom GPTs. Acts after a human click.
User-agent: ChatGPT-User
User-agent: ChatGPT-User/2.0
Allow: /

# Model-training crawler. Opt-out here if I don’t want content in GPT-4o or GPT-5.
User-agent: GPTBot
Disallow: /          # example private folder
#Allow: /                     # everything else

# ——— ANTHROPIC (Claude) ———
User-agent: anthropic-ai      # bulk model training
Disallow: /
User-agent: ClaudeBot         # chat citation fetch
User-agent: claude-web        # web-focused crawl
Allow: /

# ——— PERPLEXITY ———
User-agent: PerplexityBot     # index builder
Allow: /
User-agent: Perplexity-User   # human-triggered visit
Allow: /

# ——— GOOGLE (Gemini) ———
User-agent: Google-Extended
Disallow: /

# ——— MICROSOFT (Bing / Copilot) ———
User-agent: BingBot
Allow: /

# ——— AMAZON ———
User-agent: Amazonbot
Disallow: /

# ——— APPLE ———
User-agent: Applebot
User-agent: Applebot-Extended
Allow: /

# ——— META ———
User-agent: FacebookBot
User-agent: meta-externalagent
Allow: /

# ——— LINKEDIN ———
User-agent: LinkedInBot
Allow: /

# ——— BYTEDANCE ———
User-agent: Bytespider
Disallow: /

# ——— DUCKDUCKGO ———
User-agent: DuckAssistBot
Allow: /

# ——— COHERE ———
User-agent: cohere-ai
Allow: /

# ——— ALLEN INSTITUTE / COMMON CRAWL / OTHER RESEARCH ———
User-agent: AI2Bot
User-agent: CCBot
User-agent: Diffbot
User-agent: omgili
Disallow: /

# ——— EMERGING SEARCH START-UPS ———
User-agent: TimpiBot
User-agent: YouBot
Disallow: /

Sitemap: https://handbookgermany.de/sitemap_cms.xml
Sitemap: https://handbookgermany.de/sitemap_cms.xml