From 94a535977aa88102cc32c15f8e05fbc292371003 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 22 Feb 2023 08:23:55 +0000 Subject: [PATCH] Simplify robots.txt --- static/robots.txt | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/static/robots.txt b/static/robots.txt index 3628172896..8af963213a 100644 --- a/static/robots.txt +++ b/static/robots.txt @@ -1,4 +1,4 @@ -# Generally, block infinite traps; +# Generally, block infinite traps, and avoid archives copies of websites interfering with live sites search presence: User-agent: * Disallow: /wayback/ Disallow: /ukwa/search @@ -7,15 +7,13 @@ Disallow: /cy/ukwa/search Disallow: /gd/ukwa/search Disallow: /datasets/ Disallow: /shine/search +# Allow search engines to index specific sites: +# As requested in https://github.com/ukwa/ukwa-services/issues/96 +Allow: /wayback/archive/*/http://www.europeandialogue.org/ # Allow Twitterbot so social cards work: User-agent: Twitterbot Allow: /wayback/ User-agent: facebookexternalhit -Allow: /wayback/ - -# Allow search engines to index specific sites: -User-agent: * -# As requested in https://github.com/ukwa/ukwa-services/issues/96 -Allow: /wayback/archive/*/http://www.europeandialogue.org/ +Allow: /wayback/ \ No newline at end of file