# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# =============================================================================
# ISKCON Astara Tarakeswar — robots.txt
# Production-grade crawler directives for Google, Bing, AI search & social
# =============================================================================

# ---------- Global rules ----------
User-agent: *
Allow: /
Disallow: /admin/
Disallow: /api/
Disallow: /*.json$
Disallow: /sw.js
Disallow: /assets/_new_image_backup/

# Crawl-delay hint (Cloudflare Pages handles throttling automatically; kept for legacy bots)
Crawl-delay: 1

# ---------- Sitemaps ----------
Sitemap: https://iskconastaratarakeswar.com/sitemap.xml
Sitemap: https://iskconastaratarakeswar.com/sitemap-index.xml
Sitemap: https://iskconastaratarakeswar.com/image-sitemap.xml
Sitemap: https://iskconastaratarakeswar.com/video-sitemap.xml
Sitemap: https://iskconastaratarakeswar.com/news-sitemap.xml
Sitemap: https://iskconastaratarakeswar.com/rss.xml

# =============================================================================
# AI / LLM Crawler Rules (ChatGPT, Perplexity, Claude, Gemini, Copilot, etc.)
# We allow training + retrieval for explicit AI crawlers; this boosts
# citation in AI Overviews, ChatGPT browse, Perplexity, Copilot, Claude, Gemini.
# =============================================================================

# OpenAI / ChatGPT
User-agent: GPTBot
Allow: /
Disallow: /admin/
Disallow: /api/

User-agent: ChatGPT-User
Allow: /
Disallow: /admin/
Disallow: /api/

User-agent: OAI-SearchBot
Allow: /
Disallow: /admin/
Disallow: /api/

# Anthropic / Claude
User-agent: ClaudeBot
Allow: /
Disallow: /admin/
Disallow: /api/

User-agent: Claude-Web
Allow: /
Disallow: /admin/
Disallow: /api/

User-agent: Claude-SearchBot
Allow: /
Disallow: /admin/
Disallow: /api/

User-agent: anthropic-ai
Allow: /
Disallow: /admin/
Disallow: /api/

# Google AI (Gemini, AI Overviews)
User-agent: Google-Extended
Allow: /

User-agent: GoogleOther
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /
Disallow: /admin/
Disallow: /api/

User-agent: Perplexity-User
Allow: /

# Microsoft Copilot / Bing
User-agent: Bingbot
Allow: /

User-agent: MSNBot-Media
Allow: /

# Apple Intelligence
User-agent: Applebot-Extended
Allow: /

# Meta AI
User-agent: Meta-ExternalAgent
Allow: /

User-agent: FacebookBot
Allow: /

# Amazon ( Rufus / Alexa )
User-agent: Amazonbot
Allow: /

# DuckDuckGo
User-agent: DuckDuckBot
Allow: /

# Common Crawl (feeds most open AI corpora)
User-agent: CCBot
Allow: /

# Cohere
User-agent: cohere-ai
Allow: /

# Mistral / Le Chat
User-agent: MistralAI-User
Allow: /

# You.com
User-agent: YouBot
Allow: /

# =============================================================================
# Major search engines — explicit allow + host verification
# =============================================================================
User-agent: Googlebot
Allow: /

User-agent: Googlebot-Image
Allow: /assets/

User-agent: Googlebot-News
Allow: /blog/
Allow: /festivals/

User-agent: Googlebot-Video
Allow: /gallery/

User-agent: Mediapartners-Google
Allow: /

# =============================================================================
# Social media crawlers — required for link previews to render
# =============================================================================
User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

User-agent: Slackbot
Allow: /

User-agent: TelegramBot
Allow: /

User-agent: WhatsApp
Allow: /

User-agent: Pinterestbot
Allow: /