139 lines
3.0 KiB
Plaintext
139 lines
3.0 KiB
Plaintext
# robots.txt for BizMatch - Business Marketplace
|
|
# https://www.bizmatch.net
|
|
# Last updated: 2026-02-03
|
|
|
|
# ===========================================
|
|
# Default rules for all crawlers
|
|
# ===========================================
|
|
User-agent: *
|
|
|
|
# Allow all public pages
|
|
Allow: /
|
|
Allow: /home
|
|
Allow: /businessListings
|
|
Allow: /commercialPropertyListings
|
|
Allow: /brokerListings
|
|
Allow: /business/*
|
|
Allow: /commercial-property/*
|
|
Allow: /details-user/*
|
|
Allow: /terms-of-use
|
|
Allow: /privacy-statement
|
|
|
|
# Disallow private/admin areas
|
|
Disallow: /admin/
|
|
Disallow: /account
|
|
Disallow: /myListings
|
|
Disallow: /myFavorites
|
|
Disallow: /createBusinessListing
|
|
Disallow: /createCommercialPropertyListing
|
|
Disallow: /editBusinessListing/*
|
|
Disallow: /editCommercialPropertyListing/*
|
|
Disallow: /login
|
|
Disallow: /logout
|
|
Disallow: /register
|
|
Disallow: /emailUs
|
|
|
|
# Disallow duplicate content / API routes
|
|
Disallow: /api/
|
|
Disallow: /bizmatch/
|
|
|
|
# Disallow Cloudflare internal paths (prevents 404 errors in crawl reports)
|
|
Disallow: /cdn-cgi/
|
|
|
|
# Disallow search result pages with parameters (to avoid duplicate content)
|
|
Disallow: /*?*sortBy=
|
|
Disallow: /*?*page=
|
|
Disallow: /*?*start=
|
|
|
|
# ===========================================
|
|
# Google-specific rules
|
|
# ===========================================
|
|
User-agent: Googlebot
|
|
Allow: /
|
|
Crawl-delay: 1
|
|
|
|
# Allow Google to index images
|
|
User-agent: Googlebot-Image
|
|
Allow: /assets/
|
|
Disallow: /assets/leaflet/
|
|
|
|
# ===========================================
|
|
# Bing-specific rules
|
|
# ===========================================
|
|
User-agent: Bingbot
|
|
Allow: /
|
|
Crawl-delay: 2
|
|
|
|
# ===========================================
|
|
# Other major search engines
|
|
# ===========================================
|
|
User-agent: DuckDuckBot
|
|
Allow: /
|
|
Crawl-delay: 2
|
|
|
|
User-agent: Slurp
|
|
Allow: /
|
|
Crawl-delay: 2
|
|
|
|
User-agent: Yandex
|
|
Allow: /
|
|
Crawl-delay: 5
|
|
|
|
User-agent: Baiduspider
|
|
Allow: /
|
|
Crawl-delay: 5
|
|
|
|
# ===========================================
|
|
# AI/LLM Crawlers (Answer Engine Optimization)
|
|
# ===========================================
|
|
User-agent: GPTBot
|
|
Allow: /
|
|
Allow: /businessListings
|
|
Allow: /business/*
|
|
Disallow: /admin/
|
|
Disallow: /account
|
|
|
|
User-agent: ChatGPT-User
|
|
Allow: /
|
|
|
|
User-agent: Claude-Web
|
|
Allow: /
|
|
|
|
User-agent: Anthropic-AI
|
|
Allow: /
|
|
|
|
User-agent: PerplexityBot
|
|
Allow: /
|
|
|
|
User-agent: Cohere-ai
|
|
Allow: /
|
|
|
|
# ===========================================
|
|
# Block unwanted bots
|
|
# ===========================================
|
|
User-agent: AhrefsBot
|
|
Disallow: /
|
|
|
|
User-agent: SemrushBot
|
|
Disallow: /
|
|
|
|
User-agent: MJ12bot
|
|
Disallow: /
|
|
|
|
User-agent: DotBot
|
|
Disallow: /
|
|
|
|
User-agent: BLEXBot
|
|
Disallow: /
|
|
|
|
# ===========================================
|
|
# Sitemap locations
|
|
# ===========================================
|
|
# Main sitemap index
|
|
Sitemap: https://www.bizmatch.net/sitemap.xml
|
|
|
|
# ===========================================
|
|
# Host directive (for Yandex)
|
|
# ===========================================
|
|
Host: https://www.bizmatch.net
|