disallow AI bots
This commit is contained in:
		
							parent
							
								
									dde7921b0e
								
							
						
					
					
						commit
						55a4e5dec2
					
				| @ -1,8 +1,58 @@ | ||||
| User-agent: * | ||||
| Allow: / | ||||
| 
 | ||||
| User-agent: GPTBot | ||||
| # Block all known AI crawlers and assistants | ||||
| # from using content for training AI models. | ||||
| # Source: https://robotstxt.com/ai | ||||
| User-Agent: GPTBot | ||||
| User-Agent: ClaudeBot | ||||
| User-Agent: Claude-Web | ||||
| User-Agent: CCBot | ||||
| User-Agent: Googlebot-Extended | ||||
| User-Agent: Applebot-Extended | ||||
| User-Agent: Facebookbot | ||||
| User-Agent: Meta-ExternalAgent | ||||
| User-Agent: Meta-ExternalFetcher | ||||
| User-Agent: diffbot | ||||
| User-Agent: PerplexityBot | ||||
| User-Agent: Omgili | ||||
| User-Agent: Omgilibot | ||||
| User-Agent: webzio-extended | ||||
| User-Agent: ImagesiftBot | ||||
| User-Agent: Bytespider | ||||
| User-Agent: Amazonbot | ||||
| User-Agent: Youbot | ||||
| User-Agent: SemrushBot-OCOB | ||||
| User-Agent: Petalbot | ||||
| User-Agent: VelenPublicWebCrawler | ||||
| User-Agent: TurnitinBot | ||||
| User-Agent: Timpibot | ||||
| User-Agent: OAI-SearchBot | ||||
| User-Agent: ICC-Crawler | ||||
| User-Agent: AI2Bot | ||||
| User-Agent: AI2Bot-Dolma | ||||
| User-Agent: DataForSeoBot | ||||
| User-Agent: AwarioBot | ||||
| User-Agent: AwarioSmartBot | ||||
| User-Agent: AwarioRssBot | ||||
| User-Agent: Google-CloudVertexBot | ||||
| User-Agent: PanguBot | ||||
| User-Agent: Kangaroo Bot | ||||
| User-Agent: Sentibot | ||||
| User-Agent: img2dataset | ||||
| User-Agent: Meltwater | ||||
| User-Agent: Seekr | ||||
| User-Agent: peer39_crawler | ||||
| User-Agent: cohere-ai | ||||
| User-Agent: cohere-training-data-crawler | ||||
| User-Agent: DuckAssistBot | ||||
| User-Agent: Scrapy | ||||
| Disallow: / | ||||
| DisallowAITraining: / | ||||
| 
 | ||||
| User-agent: Applebot-Extended | ||||
| Disallow: / | ||||
| # Block any non-specified AI crawlers (e.g., new | ||||
| # or unknown bots) from using content for training | ||||
| # AI models.  This directive is still experimental | ||||
| # and may not be supported by all AI crawlers. | ||||
| User-Agent: * | ||||
| DisallowAITraining: / | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user