# Define access-restrictions for robots/spiders # http://www.robotstxt.org/wc/norobots.html # By default we allow robots to access all areas of our site # already accessible to anonymous users User-agent: * Disallow: /Members/ Disallow: /training/ Disallow: /uol/ Disallow: /homepage-content/ # Add Googlebot-specific syntax extension to exclude forms # that are repeated for each piece of content in the site # the wildcard is only supported by Googlebot # http://www.google.com/support/webmasters/bin/answer.py?answer=40367&ctx=sibling User-Agent: Googlebot Disallow: /*sendto_form$ Disallow: /*folder_factories$ Disallow: /@@*/ Disallow: /*/@@*/ Disallow: /Members/asa8/ Disallow: /offices/ssds/sd/vacancies/RSS/ Disallow: /acl_users/ # yacybot was being impolite and making 15 reqs/s User-agent: yacybot Disallow: / # http://www.80legs.com/webcrawler.html hit 64 reqs/sec User-agent: 008 Disallow: / # yandex similarly impolite User-agent: Yandex Crawl-Delay: 2 User-agent: AhrefsBot Crawl-Delay: 10 # Prevent our own GSA from crawling - should rely on feed User-agent: gsa-crawler-uol-its Disallow: /