By using a reverse proxy, it is possible to create an HTTP mirror without of the heavy burden of rsync and a large amount of allocated space. The reverse proxy will cache the particular files (with special cases to exclude repository files)
Some practical applications might include:
Note: The advantages of this proxy won't be seen with only one to two hosts except in the cases where they are being reinstalled constantly.
Make a /etc/varnish/public-mirror.vcl and edit the startup settings for varnish (On redhat, it's /etc/sysconfig/varnish
, In gentoo it's /etc/conf.d/varnishd
, …). This particular configuration uses http://mirrors.kernel.org (from the US) as it is a very reliable and usually a Teir 1 or 2 mirror with most projects. Not to say that it couldn't be used with another source such as http://jailtime.org and it's many mirrors as well.
This configuration has been tested and is currently in use with minor OS tweaks for performance.
backend kernelorg_1 { #.host = "mirrors.kernel.org"; .host = "149.20.20.135"; .port = "80"; .probe = { .request = "HEAD /index.html HTTP/1.1" "Host: mirrors.kernel.org" "Connection: close"; .timeout = 0.3 s; .window = 8; .threshold = 3; } } backend kernelorg_2 { #.host = "mirrors.kernel.org"; .host = "204.152.191.39"; .port = "80"; .probe = { .request = "HEAD /index.html HTTP/1.1" "Host: mirrors.kernel.org" "Connection: close"; .timeout = 0.3 s; .window = 8; .threshold = 3; } } // A director acts as a router for backends director ubuntu random { # { .backend = ubuntu_us_1; .weight = 1; } # { .backend = ubuntu_us_2; .weight = 1; } # { .backend = ubuntu_us_3; .weight = 1; } # { .backend = ubuntu_us_4; .weight = 1; } # { .backend = ubuntu_osuosl_1; .weight = 50; } # { .backend = ubuntu_osuosl_2; .weight = 50; } { .backend = kernelorg_2; .weight = 1; } { .backend = kernelorg_1; .weight = 1; } } sub vcl_recv { // BACKEND declarations have to happen first so that dead hosts are // properly excluded // In this case, all these base directories are handled by this single // backend. It could also look like req.host ~ "linux.etherboot.org" // for virtual host based setups if ( req.url ~ "^/(ubuntu|debian|centos|opensuse|suse|fedora|gentoo)" ) { set req.http.host = "mirrors.kernel.org"; set req.backend = ubuntu; } // Skip repository files (apt) if ( req.url ~ "/(Release|Packages|Sources)(|\.gz|\.bz2|\.gpg|)$") { pass; } // Skip repository files (yum) if ( req.url ~ "/repodata/.*" ) { pass; } // Basic housekeeping for dead backends // Continue serving stale files for up to an hour while the backend recovers if (req.backend.healthy) { set req.grace = 30s; } else { set req.grace = 1h; } } sub vcl_fetch { // Debugging info set obj.http.X-Varnish-Url = req.url; // Keep cachable objects up to 1day set obj.ttl = 1d; // Serve the stale file for up to 15m while a newer version is fetched set obj.grace = 15m; #set beresp.grace = 15m; // not used yet set obj.prefetch = -30s; } sub vcl_miss { // Nasty spiders!! DIE!! if (req.http.user-agent ~ "spider") { error 503 "Not presently in cache, please try again later"; } } sub vcl_deliver { // Debugging headers if (obj.hits > 0 ) { set resp.http.X-Cache = "HIT"; } else { set resp.http.X-Cache = "MISS"; } }