bizmatch-project/bizmatch/src/robots.txt

139 lines
3.0 KiB
Plaintext

# robots.txt for BizMatch - Business Marketplace
# https://www.bizmatch.net
# Last updated: 2026-02-03
# ===========================================
# Default rules for all crawlers
# ===========================================
User-agent: *
# Allow all public pages
Allow: /
Allow: /home
Allow: /businessListings
Allow: /commercialPropertyListings
Allow: /brokerListings
Allow: /business/*
Allow: /commercial-property/*
Allow: /details-user/*
Allow: /terms-of-use
Allow: /privacy-statement
# Disallow private/admin areas
Disallow: /admin/
Disallow: /account
Disallow: /myListings
Disallow: /myFavorites
Disallow: /createBusinessListing
Disallow: /createCommercialPropertyListing
Disallow: /editBusinessListing/*
Disallow: /editCommercialPropertyListing/*
Disallow: /login
Disallow: /logout
Disallow: /register
Disallow: /emailUs
# Disallow duplicate content / API routes
Disallow: /api/
Disallow: /bizmatch/
# Disallow Cloudflare internal paths (prevents 404 errors in crawl reports)
Disallow: /cdn-cgi/
# Disallow search result pages with parameters (to avoid duplicate content)
Disallow: /*?*sortBy=
Disallow: /*?*page=
Disallow: /*?*start=
# ===========================================
# Google-specific rules
# ===========================================
User-agent: Googlebot
Allow: /
Crawl-delay: 1
# Allow Google to index images
User-agent: Googlebot-Image
Allow: /assets/
Disallow: /assets/leaflet/
# ===========================================
# Bing-specific rules
# ===========================================
User-agent: Bingbot
Allow: /
Crawl-delay: 2
# ===========================================
# Other major search engines
# ===========================================
User-agent: DuckDuckBot
Allow: /
Crawl-delay: 2
User-agent: Slurp
Allow: /
Crawl-delay: 2
User-agent: Yandex
Allow: /
Crawl-delay: 5
User-agent: Baiduspider
Allow: /
Crawl-delay: 5
# ===========================================
# AI/LLM Crawlers (Answer Engine Optimization)
# ===========================================
User-agent: GPTBot
Allow: /
Allow: /businessListings
Allow: /business/*
Disallow: /admin/
Disallow: /account
User-agent: ChatGPT-User
Allow: /
User-agent: Claude-Web
Allow: /
User-agent: Anthropic-AI
Allow: /
User-agent: PerplexityBot
Allow: /
User-agent: Cohere-ai
Allow: /
# ===========================================
# Block unwanted bots
# ===========================================
User-agent: AhrefsBot
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: BLEXBot
Disallow: /
# ===========================================
# Sitemap locations
# ===========================================
# Main sitemap index
Sitemap: https://www.bizmatch.net/sitemap.xml
# ===========================================
# Host directive (for Yandex)
# ===========================================
Host: https://www.bizmatch.net