From 359512d5890a741be075eb8b2d097130641aa09b Mon Sep 17 00:00:00 2001 From: Dave Jansen Date: Thu, 2 May 2024 11:47:59 +0900 Subject: [PATCH] Updated robots.txt and added ai.txt --- ai.txt | 6 ++++++ robots.txt | 55 ++++++++++++++++++++++++++---------------------------- 2 files changed, 32 insertions(+), 29 deletions(-) create mode 100644 ai.txt diff --git a/ai.txt b/ai.txt new file mode 100644 index 0000000..9df92b0 --- /dev/null +++ b/ai.txt @@ -0,0 +1,6 @@ +# Spawning AI +# Prevent datasets from using the following file types + +User-Agent: * +Disallow: / +Disallow: * diff --git a/robots.txt b/robots.txt index 6a4ea19..c05d314 100644 --- a/robots.txt +++ b/robots.txt @@ -2,37 +2,34 @@ User-agent: * Disallow: /ghost -# Used for many other (non-commercial) purposes as well +# (Attempt to) block a bunch of AI bots +# Kindly borrowed from: +# https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt +User-agent: AdsBot-Google +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: AwarioRssBot +User-agent: AwarioSmartBot +User-agent: Bytespider User-agent: CCBot -Disallow: / - -# For new training only -User-agent: GPTBot -Disallow: / - -# Not for training, only for user requests User-agent: ChatGPT-User -Disallow: / - -# Marker for disabling Bard and Vertex AI -User-agent: Google-Extended -Disallow: / - -# Speech synthesis only? +User-agent: ClaudeBot +User-agent: Claude-Web +User-agent: cohere-ai +User-agent: DataForSeoBot User-agent: FacebookBot -Disallow: / - -# Multi-purpose, commercial uses; including LLMs -User-agent: Omgilibot -Disallow: / - -# Block imagesift.com bot, from thehive.ai -User-Agent: ImagesiftBot -Disallow: / - +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GPTBot +User-agent: ImagesiftBot +User-agent: magpie-crawler +User-agent: Meltwater +User-agent: omgili +User-agent: omgilibot +User-agent: peer39_crawler +User-agent: peer39_crawler/1.0 User-agent: PerplexityBot +User-agent: Seekr +User-agent: YouBot Disallow: / -User-Agent: anthropic-ai -Disallow: / -User-Agent: cohere-ai -Disallow: / \ No newline at end of file