gitolite/src/gl-compile-conf

#!/usr/bin/perl

use strict;
use warnings;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;

# ----------------------------------------------------------------------------
#       find the rc file, then pull the libraries
# ----------------------------------------------------------------------------

BEGIN {
    die "ENV GL_RC not set\n" unless $ENV{GL_RC};
    die "ENV GL_BINDIR not set\n" unless $ENV{GL_BINDIR};
}

use lib $ENV{GL_BINDIR};
use gitolite_rc;
use gitolite qw(:DEFAULT %repos %groups %git_configs %split_conf);

# === add-auth-keys ===

# setup quiet mode if asked; please do not use this when running manually
open STDOUT, ">", "/dev/null" if (@ARGV and shift eq '-q');

# ----------------------------------------------------------------------------
#       definitions specific to this program
# ----------------------------------------------------------------------------

# groups can now represent user groups or repo groups.

# $groups{group}{member} = "master" (or name of fragment file in which the
# group is defined).
# our %groups = ();     # moved to gitolite.pm now

# %repos has two functions.

# $repos{repo}{R|W}{user} = 1 if user has R (or W) permissions for at least
# one branch in repo.  This is used by the "level 1 check" (see faq).  There's
# also the new "C" (create a repo) permission now

# $repos{repo}{user} is a list of {ref, perms} pairs.  This is used by the
# level 2 check.  In order to allow "exclude" rules, the order of rules now
# matters, so what used to be entirely "hash of hash of hash" now has a list
# in between :)
# copy above desc to lite.pm -- my %repos = ();

# names of repos whose ACLs don't make it into the main compiled config file
# copy above desc to lite.pm -- my %split_conf = ();

# rule sequence number
my $rule_seq = 0;

# <sigh>...  having been forced to use a list as described above, we lose some
# efficiency due to the possibility of the same {ref, perms} pair showing up
# multiple times for the same repo+user.  So...
my %rurp_seen = ();

# catch usernames<->pubkeys mismatches; search for "lint" below
my %user_list = ();

# repo specific 'git config' stuff
# our %git_configs = ();    # moved to gitolite.pm now

# gitweb descriptions and owners; plain text, keyed by "$repo.git"
my %desc = ();
my %owner = ();

# ----------------------------------------------------------------------------
#       subroutines
# ----------------------------------------------------------------------------

sub expand_list
{
    my @list = @_;
    my @new_list = ();

    for my $item (@list)
    {
        if ($item =~ /^@/ and $item ne '@all')      # nested group
        {
            die "$ABRT undefined group $item\n" unless $groups{$item};
            # add those names to the list
            push @new_list, sort keys %{ $groups{$item} };
        }
        else
        {
            push @new_list, $item;
        }
    }

    return @new_list;
}

# ----------------------------------------------------------------------------
#       "compile" GL conf
# ----------------------------------------------------------------------------

sub check_fragment_repo_disallowed
{
    # trying to set access for $repo (='foo')...
    my ($fragment, $repo) = @_;

    # processing the master config, not a fragment
    return 0 if $fragment eq 'master';
    # fragment is also called 'foo' (you're allowed to have a
    # fragment that is only concerned with one repo)
    return 0 if $fragment eq $repo;
    # same thing in big-config-land; foo is just @foo now
    return 0 if $GL_BIG_CONFIG and ("\@$fragment" eq $repo);
    my @matched = grep { $repo =~ /^$_$/ }
                  grep { $groups{"\@$fragment"}{$_} eq 'master' }
                  sort keys %{ $groups{"\@$fragment"} };
    return 0 if @matched > 0;
    return 1;
}

sub parse_conf_line
{
    my ($line, $fragment, $repos_p, $ignored_p) = @_;

    # user or repo groups
    if ($line =~ /^(@\S+) = ?(.*)/)
    {
        die "$ABRT defining groups is not allowed inside fragments\n"
            if $GL_BIG_CONFIG and $fragment ne 'master';
        # store the members of each group as hash key.  Keep track of when
        # the group was *first* created by using $fragment as the *value*
        do { $groups{$1}{$_} ||= $fragment } for ( expand_list( split(' ', $2) ) );
        # create the group hash even if empty
        $groups{$1} = {} unless $groups{$1};
        die "$ABRT bad group $1\n" unless $1 =~ $REPONAME_PATT;
    }
    # repo(s)
    elsif ($line =~ /^repo (.*)/)
    {
        # grab the list...
        @{ $repos_p } = split ' ', $1;
        # ...expand groups in the default case
        @{ $repos_p } = expand_list ( @{ $repos_p } ) unless $GL_BIG_CONFIG;
        # ...sanity check
        for (@{ $repos_p }) {
            print STDERR "$WARN explicit '.git' extension ignored for $_.git\n" if s/\.git$//;
            die "$ABRT bad reponame $_\n"
                if ($GL_WILDREPOS and $_ !~ $REPOPATT_PATT);
            die "$ABRT bad reponame $_ or you forgot to set \$GL_WILDREPOS\n"
                if (not $GL_WILDREPOS and $_ !~ $REPONAME_PATT);
        }
        s/\bCREAT[EO]R\b/\$creator/g for @{ $repos_p };
    }
    # actual permission line
    elsif ($line =~ /^(-|C|R|RW\+?(?:C?D?|D?C?)) (.* )?= (.+)/)
    {
        my $perms = $1;
        my @refs; @refs = split( ' ', $2 ) if $2;
        @refs = expand_list ( @refs );
        my @users = split ' ', $3;
        die "$ABRT \$GL_WILDREPOS is not set, you cant use 'C' in config\n" if $perms eq 'C' and not $GL_WILDREPOS;

        # if no ref is given, this PERM applies to all refs
        @refs = qw(refs/.*) unless @refs;
        # deprecation warning
        map { print STDERR "WARNING: old syntax 'PATH/' found; please use new syntax 'NAME/'\n" if s(^PATH/)(NAME/) } @refs;
        # fully qualify refs that dont start with "refs/" or "NAME/";
        # prefix them with "refs/heads/"
        @refs = map { m(^(refs|NAME)/) or s(^)(refs/heads/); $_ } @refs;
        @refs = map { s(/USER/)(/\$gl_user/); $_ } @refs;

        # expand the user list, unless it is just "@all"
        @users = expand_list ( @users ) unless $GL_BIG_CONFIG;
        do  { die "$ABRT bad username $_\n" unless $_ =~ $USERNAME_PATT } for @users;

        s/\bCREAT[EO]R\b/~\$creator/g for @users;

        # ok, we can finally populate the %repos hash
        for my $repo (@{ $repos_p })       # each repo in the current stanza
        {
            # if we're processing a delegated config file (not the master
            # config), we need to prevent attempts by that admin to obtain
            # rights on stuff outside his domain

            # trying to set access for $repo (='foo')...
            if (check_fragment_repo_disallowed( $fragment, $repo ))
            {
                $ignored_p->{$fragment}{$repo} = 1;
                next;
            }
            for my $user (@users)
            {
                # lint check, to catch pubkey/username typos
                if ($user =~ /^@/ and $user ne '@all') {
                    # this is a usergroup, not a normal user; happens with GL_BIG_CONFIG
                    if (exists $groups{$user}) {
                        $user_list{$_}++ for keys %{ $groups{$user} };
                    }
                } else {
                    $user_list{$user}++;
                }

                # for 1st level check (see faq/tips doc)
                $repos{$repo}{C}{$user} = 1, next if $perms eq 'C';
                $repos{$repo}{R}{$user} = 1 if $perms =~ /R/;
                $repos{$repo}{W}{$user} = 1 if $perms =~ /W|D/;

                # if the user specified even a single 'D' anywhere, make
                # that fact easy to find; this changes the meaning of RW+
                # to no longer permit deletes (see update hook)
                $repos{$repo}{DELETE_IS_D} = 1 if $perms =~ /D/;
                $repos{$repo}{CREATE_IS_C} = 1 if $perms =~ /RW.*C/;

                # for 2nd level check, store each "ref, perms" pair in order
                for my $ref (@refs)
                {
                    # checking NAME based restrictions is expensive for
                    # the update hook (see the changes to src/hooks/update
                    # in this commit for why) so we would *very* much like
                    # to avoid doing it for the large majority of repos
                    # that do *not* use NAME limits.  Setting a flag that
                    # can be checked right away will help us do that
                    $repos{$repo}{NAME_LIMITS} = 1 if $ref =~ /^NAME\//;
                    my $p_user = $user; $p_user =~ s/creator$/creator - wild/;
                    push @{ $repos{$repo}{$p_user} }, [ $rule_seq++, $ref, $perms ]
                        unless $rurp_seen{$repo}{$p_user}{$ref}{$perms}++;
                }
            }
        }
    }
    # repo specific 'git config' stuff
    elsif ($line =~ /^config (.+) = ?(.*)/)
    {
        my ($key, $value) = ($1, $2);
        my @validkeys = split(' ', ($GL_GITCONFIG_KEYS || '') );
        my @matched = grep { $key =~ /^$_$/ } @validkeys;
        die "$ABRT git config $key not allowed\ncheck GL_GITCONFIG_KEYS in the rc file for how to allow it\n" if (@matched < 1);
        for my $repo (@{ $repos_p })       # each repo in the current stanza
        {
            $git_configs{$repo}{$key} = $value;
            # no problem if it's a plain repo (non-pattern, non-groupname)
            # OR wild configs are allowed
            unless ( ($repo =~ $REPONAME_PATT and $repo !~ /^@/) or $GL_GITCONFIG_WILD) {
                my @r = ($repo);                                        # single wildpatt
                @r = sort keys %{ $groups{$repo} } if $groups{$repo};   # or a group; get its members
                do {
                    warn "$WARN git config set for $_ but \$GL_GITCONFIG_WILD not set\n" unless $_ =~ $REPONAME_PATT
                } for @r;
            }
        }
    }
    # include
    elsif ($line =~ /^include "(.+)"/)
    {
        my $file = $1;
        $file = "$GL_ADMINDIR/conf/$file" unless $file =~ /^\//;
        die "$ABRT $fragment attempting to include configuration\n" if $fragment ne 'master';
        die "$ABRT included file not found: '$file'\n" unless -f $file;

        parse_conf_file( $file, $fragment );
    }
    # very simple syntax for the gitweb description of repo; one of:
    #       reponame = "some description string"
    #       reponame "owner name" = "some description string"
    elsif ($line =~ /^(\S+)(?: "(.*?)")? = "(.*)"$/)
    {
        my ($repo, $owner, $desc) = ($1, $2, $3);
        die "$ABRT bad repo name $repo\n" unless $repo =~ $REPONAME_PATT;
        die "$ABRT $fragment attempting to set description for $repo\n" if check_fragment_repo_disallowed( $fragment, $repo );
        $desc{"$repo.git"} = $desc;
        $owner{"$repo.git"} = $owner || '';
    }
    else
    {
        die "$ABRT can't make head or tail of '$line'\n";
    }
}

sub cleanup_conf_line
{
    my ($line) = @_;

    # kill comments, but take care of "#" inside *simple* strings
    $line =~ s/^((".*?"|[^#"])*)#.*/$1/;
    # normalise whitespace; keeps later regexes very simple
    $line =~ s/=/ = /;
    $line =~ s/\s+/ /g;
    $line =~ s/^ //;
    $line =~ s/ $//;
    return $line;
}

sub parse_conf_file
{
    my ($conffile, $fragment) = @_;
    # the second arg, $fragment, is passed in as "master" when parsing the
    # main config, and the fragment name when parsing a fragment.  In the
    # latter case, the parser uses that information to ignore (and warn about)
    # any repos in the fragment that are not members of the "repo group" of
    # the same name.
    my %ignored = ();

    my $conf_fh = wrap_open( "<", $conffile );

    # the syntax is fairly simple, so we parse it inline

    my @repos;
    my $line;
    while (<$conf_fh>)
    {
        $line = cleanup_conf_line($_);
        # skip blank lines
        next unless $line =~ /\S/;

        parse_conf_line( $line, $fragment, \@repos, \%ignored );
    }
    for my $ig (sort keys %ignored)
    {
        warn "\n\t\t***** WARNING *****\n" .
             "\t$ig.conf attempting to set access for " .
             join (", ", sort keys %{ $ignored{$ig} }) . "\n";
    }
}

# parse the main config file
parse_conf_file($GL_CONF, 'master');

# parse any delegated fragments
wrap_chdir($GL_ADMINDIR);
for my $fragment_file (glob("conf/fragments/*.conf"))
{
    # we already check (elsewhere) that a fragment called "foo" will not try
    # to specify access control for a repo whose name is not "foo" or is not
    # part of a group called "foo" created by master

    # meanwhile, I found a possible attack where the admin for group B creates
    # a "convenience" group of (a subset of) his users, and then the admin for
    # repo group A (alphabetically before B) adds himself to that same group
    # in his own fragment.

    # as a result, admin_A now has access to group B repos :(

    # so now we lock the groups hash to the value it had after parsing
    # "master", and localise any changes to it by this fragment so that they
    # don't propagate to the next fragment.  Thus, each fragment now has only
    # those groups that are defined in "master" and itself

    local %groups = %groups;

    my $fragment = $fragment_file;
    $fragment =~ s/^conf\/fragments\/(.*).conf$/$1/;
    parse_conf_file($fragment_file, $fragment);
}

# ----------------------------------------------------------------------------
#       (that ends the config file compiler, though we postpone the writing
#       for now to deal with the latest GL_BIG_CONFIG innovation!)
# ----------------------------------------------------------------------------

# ----------------------------------------------------------------------------
#       what's the git version?
# ----------------------------------------------------------------------------

# we don't like stuff older than 1.6.2

my $git_version = `git --version`;
die "
    *** ERROR ***
    did not get a proper version number.  Please see if git is in the PATH on
    the server.  If it is not, please edit ~/.gitolite.rc on the server and
    set the \$GIT_PATH variable to the correct value\n
" unless $git_version;
my ($gv_maj, $gv_min, $gv_patchrel) = ($git_version =~ m/git version (\d+)\.(\d+)\.(\d+)/);
die "$ABRT I can't understand $git_version\n" unless ($gv_maj >= 1);
$git_version = $gv_maj*10000 + $gv_min*100 + $gv_patchrel;  # now it's "normalised"

die "\n\t\t***** AAARGH! *****\n" .
    "\tyour git version is older than 1.6.2\n" .
    "\tsince that is now more than one year old, and gitolite needs some of\n" .
    "\tthe newer features, please upgrade.\n"
    if $git_version < 10602;     # that's 1.6.2 to you

# ----------------------------------------------------------------------------
# most of the rest of this program can be "switched off"; see
# doc/big-config.mkd for details.
# ----------------------------------------------------------------------------

# repo-base needs to be an absolute path due to all the jumping around we do,
# so if it was not already absolute, prefix $HOME.
$ENV{GL_REPO_BASE_ABS} = ( $REPO_BASE =~ m(^/) ? $REPO_BASE : "$ENV{HOME}/$REPO_BASE" );

# process the normal repos in %repos.  This includes creating them if needed
# (and GL_NO_CREATE_REPOS is not set), checking hooks, and finally, if
# GL_BIG_CONFIG is set, writing out the one-repo config file for directly
# specified repos (i.e., "repo foo", not "@grp = foo" + "repo @grp")
do_normal_repos();
write_compiled_conf();    # write out the final compiled config

# ----------------------------------------------------------------------------
#       process the normal repos in %repos (create, hook, one_repo config...)
# ----------------------------------------------------------------------------

sub do_normal_repos
{
    wrap_chdir("$ENV{GL_REPO_BASE_ABS}");

    # start with the ones that are normal repos in %repos
    my @repos = grep { $_ =~ $REPONAME_PATT and not /^@/ } sort keys %repos;
    # then, for each repogroup, find the members of the group and add them in
    map { push @repos, keys %{ $groups{$_} } } grep { /^@/ } keys %repos;
    # weed out duplicates (the code in the loop below is disk activity!)
    my %seen = map { $_ => 1 } @repos;
    @repos = sort keys %seen;

    for my $repo (sort @repos) {
        next unless $repo =~ $REPONAME_PATT;    # skip repo patterns
        next if $repo =~ m(^\@|EXTCMD/);        # skip groups and fake repos

        unless ($GL_NO_CREATE_REPOS) {
            unless (-d "$repo.git") {
                print STDERR "creating $repo...\n";
                new_repo($repo, "$GL_ADMINDIR/hooks/common");
                # new_repo would have chdir'd us away; come back
                wrap_chdir("$ENV{GL_REPO_BASE_ABS}");
            }

            # when repos are copied over from elsewhere, one had to run easy install
            # once again to make the new (OS-copied) repo contain the proper update
            # hook.  Perhaps we can make this easier now, and eliminate the easy
            # install, with a quick check (and a new, empty, "hook" as a sentinel)
            unless (-l "$repo.git/hooks/gitolite-hooked") {
                ln_sf("$GL_ADMINDIR/hooks/common", "*", "$repo.git/hooks");
                # in case of package install, GL_ADMINDIR is no longer the top cop;
                # override with the package hooks
                ln_sf("$GL_PACKAGE_HOOKS/common", "*", "$repo.git/hooks") if $GL_PACKAGE_HOOKS;
            }
        }

        # write a one_repo config for normal repos declared directly (not just via a group)
        write_1_compiled_conf($repo) if $GL_BIG_CONFIG and $repos{$repo} and -d "$repo.git";
    }
}

sub write_1_compiled_conf
{
    # warning: writes and *deletes* it from %repos and %git_configs
    my ($repo) = shift;
    my (%one_repo, %one_git_config);

    open(my $compiled_fh, ">", "$repo.git/gl-conf") or return;

    $one_repo{$repo} = $repos{$repo};
    delete $repos{$repo};
    my $dumped_data = Data::Dumper->Dump([\%one_repo], [qw(*one_repo)]);

    if ($git_configs{$repo}) {
        $one_git_config{$repo} = $git_configs{$repo};
        delete $git_configs{$repo};
        $dumped_data .= Data::Dumper->Dump([\%one_git_config], [qw(*one_git_config)]);
    }

    # the dump uses single quotes, but we convert any strings containing $creator
    # and $gl_user to double quoted strings.  A bit sneaky, but not too much...
    $dumped_data =~ s/'(?=[^']*\$(?:creator|gl_user))~?(.*?)'/"$1"/g;
    print $compiled_fh $dumped_data;
    close $compiled_fh;

    $split_conf{$repo} = 1;
}

sub write_compiled_conf
{
    my $compiled_fh = wrap_open( ">", "$GL_CONF_COMPILED.new" );
    my $data_version = $current_data_version;
    print $compiled_fh Data::Dumper->Dump([$data_version], [qw(*data_version)]);
    my $dumped_data = Data::Dumper->Dump([\%repos], [qw(*repos)]);
    $dumped_data .= Data::Dumper->Dump([\%git_configs], [qw(*git_configs)]) if %git_configs;
    # the dump uses single quotes, but we convert any strings containing $creator
    # and $gl_user to double quoted strings.  A bit sneaky, but not too much...
    $dumped_data =~ s/'(?=[^']*\$(?:creator|gl_user))~?(.*?)'/"$1"/g;
    print $compiled_fh $dumped_data;
    if (%groups) {
        $dumped_data = Data::Dumper->Dump([\%groups], [qw(*groups)]);
        $dumped_data =~ s/\bCREAT[EO]R\b/\$creator/g;
        $dumped_data =~ s/'(?=[^']*\$(?:creator|gl_user))~?(.*?)'/"$1"/g;
        print $compiled_fh $dumped_data;
    }
    print $compiled_fh Data::Dumper->Dump([\%split_conf], [qw(*split_conf)]) if %split_conf;
    close $compiled_fh or die "$ABRT close compiled-conf failed: $!\n";
    rename "$GL_CONF_COMPILED.new", "$GL_CONF_COMPILED";
}

# ----------------------------------------------------------------------------
#       get a list of physical repos for later
# ----------------------------------------------------------------------------

my @phy_repos = ();
@phy_repos = list_phy_repos() unless $GL_NO_DAEMON_NO_GITWEB;

# NOTE: we're overloading GL_NO_DAEMON_NO_GITWEB to mean "no git config" also.
# In fact anything that requires trawling through the existing repos doing
# stuff to all of them is skipped if this variable is set.  This is primarily
# for the Fedora folks, but it should be useful for anyone who has a huge set
# of repos and wants to manage gitweb/daemon/etc access via other means (they
# typically have the whole thing controlled by a web-app and a database
# anyway, and gitolite is only doing the access control and nothing more).

# ----------------------------------------------------------------------------
#       various updates to all real repos
# ----------------------------------------------------------------------------

# ----------------------------------------------------------------------------
#       update repo configurations, gitweb description, daemon export-ok, etc
# ----------------------------------------------------------------------------

my %projlist = ();

# for each real repo (and remember this will be empty, thus skipping all this,
# if $GL_NO_DAEMON_NO_GITWEB is on!)

# note: we do them in 2 separate loops to avoid breaking the optimisation in
# sub parse_acl (look for variable $saved_crwu)

for my $repo (@phy_repos) {
    wrap_chdir("$ENV{GL_REPO_BASE_ABS}/$repo.git");
    # daemon is easy
    setup_daemon_access($repo);
}

for my $repo (@phy_repos) {
    wrap_chdir("$ENV{GL_REPO_BASE_ABS}/$repo.git");
    # gitweb is a little more complicated.  Here're some notes:
    # - "setup_gitweb_access" also sets "owner", despite the name
    # - specifying a description also counts as enabling gitweb
    # - description and owner are not specified for wildrepos; they're
    #   specified for *actual* repos, even if the repo was created by a
    #   wild card spec and "C" permissions.  If you see the docs for the
    #   gitolite.conf file, you will see that repo owner/desc don't go
    #   into the "repo foo" section; they're essentialy independent.
    #   Anyway, I believe it doesn't make sense to have all wild repos
    #   (for some pattern) to have the same description and owner.
    $projlist{"$repo.git"} = 1 if setup_gitweb_access($repo, $desc{"$repo.git"} || '', $owner{"$repo.git"} || '');

    # git config
    # implementation note: this must happen *after* one of the previous 2
    # calls (setup daemon or gitweb).  The reason is that they call
    # "can_read", which eventually calls parse_acl with the right "creator"
    # set for the *current* repo, which in turn stores translated values for
    # $creator in the git_configs hash, which, (phew!) is needed for a match
    # that eventually gets you a valid $git_configs{} below
    setup_git_configs($repo, \%git_configs) if $git_configs{$repo};
}

# write out the project list
my $projlist_fh = wrap_open( ">", $PROJECTS_LIST);
for my $proj (sort keys %projlist) {
    print $projlist_fh "$proj\n";
}
close $projlist_fh;

# ----------------------------------------------------------------------------
#       "compile" ssh authorized_keys
# ----------------------------------------------------------------------------

unless ($GL_NO_SETUP_AUTHKEYS) {
    setup_authkeys($GL_KEYDIR, \%user_list);
}