Table of Contents

Introduction

By using a reverse proxy, it is possible to create an HTTP mirror without of the heavy burden of rsync and a large amount of allocated space. The reverse proxy will cache the particular files (with special cases to exclude repository files)

Some practical applications might include:

Note: The advantages of this proxy won't be seen with only one to two hosts except in the cases where they are being reinstalled constantly.

Requirements

Varnish Config

Make a /etc/varnish/public-mirror.vcl and edit the startup settings for varnish (On redhat, it's /etc/sysconfig/varnish, In gentoo it's /etc/conf.d/varnishd, …). This particular configuration uses http://mirrors.kernel.org (from the US) as it is a very reliable and usually a Teir 1 or 2 mirror with most projects. Not to say that it couldn't be used with another source such as http://jailtime.org and it's many mirrors as well.

This configuration has been tested and is currently in use with minor OS tweaks for performance.

backend kernelorg_1 {                                                                                     
        #.host = "mirrors.kernel.org";
        .host = "149.20.20.135";                                                                          
        .port = "80";                                                                                     
        .probe = {
                .request =
                "HEAD /index.html HTTP/1.1"
                "Host: mirrors.kernel.org"
                "Connection: close";
                .timeout = 0.3 s;
                .window = 8;
                .threshold = 3;
        }
}
 
backend kernelorg_2 { 
        #.host = "mirrors.kernel.org";
        .host = "204.152.191.39";     
        .port = "80";                 
        .probe = {                    
                .request =            
                "HEAD /index.html HTTP/1.1"
                "Host: mirrors.kernel.org" 
                "Connection: close";       
                .timeout = 0.3 s;          
                .window = 8;               
                .threshold = 3;            
        }                                  
}                                          
 
// A director acts as a router for backends
director ubuntu random {
        # { .backend = ubuntu_us_1; .weight = 1; }
        # { .backend = ubuntu_us_2; .weight = 1; }
        # { .backend = ubuntu_us_3; .weight = 1; }
        # { .backend = ubuntu_us_4; .weight = 1; }
        # { .backend = ubuntu_osuosl_1; .weight = 50; }
        # { .backend = ubuntu_osuosl_2; .weight = 50; }
        { .backend = kernelorg_2; .weight = 1; }
        { .backend = kernelorg_1; .weight = 1; }
}
 
sub vcl_recv {
 
        // BACKEND declarations have to happen first so that dead hosts are 
        // properly excluded
 
        // In this case, all these base directories are handled by this single 
        // backend.  It could also look like req.host ~ "linux.etherboot.org"
        // for virtual host based setups
        if ( req.url ~ "^/(ubuntu|debian|centos|opensuse|suse|fedora|gentoo)" ) {
                set req.http.host = "mirrors.kernel.org";
                set req.backend = ubuntu;
        }
 
        // Skip repository files (apt)
        if ( req.url ~ "/(Release|Packages|Sources)(|\.gz|\.bz2|\.gpg|)$") {
                pass;
        }
        // Skip repository files (yum)
        if ( req.url ~ "/repodata/.*" ) {
                pass;
        }
 
 
        // Basic housekeeping for dead backends
        // Continue serving stale files for up to an hour while the backend recovers
        if (req.backend.healthy) {
                set req.grace = 30s;
        } else {
                set req.grace = 1h;
        }
}
 
sub vcl_fetch {
        // Debugging info 
        set obj.http.X-Varnish-Url = req.url;
 
        // Keep cachable objects up to 1day
        set obj.ttl = 1d;
        // Serve the stale file for up to 15m while a newer version is fetched 
        set obj.grace = 15m;
        #set beresp.grace = 15m;
        // not used yet
        set obj.prefetch = -30s;
}
 
sub vcl_miss {
        // Nasty spiders!! DIE!!  
        if (req.http.user-agent ~ "spider") {
                error 503 "Not presently in cache, please try again later";
        }
}
 
sub vcl_deliver {
        // Debugging headers
        if (obj.hits > 0 ) {
                set resp.http.X-Cache = "HIT";
        } else {
                set resp.http.X-Cache = "MISS";
        }
}