Updated robots.txt and added ai.txt

This commit is contained in:
2024-05-02 11:47:59 +09:00
parent 23452a8585
commit 359512d589
2 changed files with 32 additions and 29 deletions

6
ai.txt Normal file
View File

@@ -0,0 +1,6 @@
# Spawning AI
# Prevent datasets from using the following file types
User-Agent: *
Disallow: /
Disallow: *

View File

@@ -2,37 +2,34 @@
User-agent: * User-agent: *
Disallow: /ghost Disallow: /ghost
# Used for many other (non-commercial) purposes as well # (Attempt to) block a bunch of AI bots
# Kindly borrowed from:
# https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt
User-agent: AdsBot-Google
User-agent: Amazonbot
User-agent: anthropic-ai
User-agent: Applebot
User-agent: AwarioRssBot
User-agent: AwarioSmartBot
User-agent: Bytespider
User-agent: CCBot User-agent: CCBot
Disallow: /
# For new training only
User-agent: GPTBot
Disallow: /
# Not for training, only for user requests
User-agent: ChatGPT-User User-agent: ChatGPT-User
Disallow: / User-agent: ClaudeBot
User-agent: Claude-Web
# Marker for disabling Bard and Vertex AI User-agent: cohere-ai
User-agent: Google-Extended User-agent: DataForSeoBot
Disallow: /
# Speech synthesis only?
User-agent: FacebookBot User-agent: FacebookBot
Disallow: / User-agent: Google-Extended
User-agent: GoogleOther
# Multi-purpose, commercial uses; including LLMs User-agent: GPTBot
User-agent: Omgilibot User-agent: ImagesiftBot
Disallow: / User-agent: magpie-crawler
User-agent: Meltwater
# Block imagesift.com bot, from thehive.ai User-agent: omgili
User-Agent: ImagesiftBot User-agent: omgilibot
Disallow: / User-agent: peer39_crawler
User-agent: peer39_crawler/1.0
User-agent: PerplexityBot User-agent: PerplexityBot
User-agent: Seekr
User-agent: YouBot
Disallow: / Disallow: /
User-Agent: anthropic-ai
Disallow: /
User-Agent: cohere-ai
Disallow: /