(big-config) the new "big-config" for large setups

If you have many thousands of repos and users, neatly organised into
groups, etc., the normal gitolite fails.  (It actually runs out of
memory very fast while doing the "compile" when you push the config, due
to the number of combinations of repo/user being stored in the hash!)

This commit series will stop doing that if you set $GL_BIG_CONFIG = 1 in
the rc file.

Some notes:

  - deny rules will still work but somewhat differently -- now they must
    be placed all together in one place to work like before.  Ask me for
    details if you need to know before I get done with the docs

  - I've tested most of the important features, but not every single
    nuance

  - the update hook may be a tad less efficient now; we can try and
    tweak it later if needed but it shouldn't really hurt anything
    significantly even now

  - docs have not been written yet
This commit is contained in:
Sitaram Chamarty 2010-05-10 11:46:47 +05:30
parent 346b396840
commit cf0e568c89
5 changed files with 170 additions and 76 deletions

View file

@ -1,4 +1,5 @@
# paths and configuration variables for gitolite
$GL_BIG_CONFIG = 1;
# please read comments before editing

View file

@ -34,27 +34,12 @@ exit 0 if exists $ENV{GL_BYPASS_UPDATE_HOOK};
# we should already have the GL_RC env var set when we enter this hook
die "parse $ENV{GL_RC} failed: " . ($! or $@) unless do $ENV{GL_RC};
# then "do" the compiled config file, whose name we now know. Before doing
# that we setup the creator etc from environment variables so that the parse
# interpolates them. We've minimised the duplication but this *does*
# duplicate a bit of parse_acl from gitolite.pm; we don't want to include that
# file here just for that little bit
{
our $creator = $ENV{GL_CREATOR};
our $readers = $ENV{GL_READERS};
our $writers = $ENV{GL_WRITERS};
our $gl_user = $ENV{GL_USER};
die "parse $GL_CONF_COMPILED failed: " . ($! or $@) unless do $GL_CONF_COMPILED;
$repos{$ENV{GL_REPO}} = $repos{$ENV{GL_REPOPATT}} if ( $ENV{GL_REPOPATT} );
}
my $reported_repo = $ENV{GL_REPO} . ( $ENV{GL_REPOPATT} ? " ($ENV{GL_REPOPATT})" : "" );
# we've started to need some common subs in what used to be a small, cute,
# little script that barely spanned a few lines :(
require "$ENV{GL_BINDIR}/gitolite.pm";
my ($perm, $creator, $wild) = &repo_rights($ENV{GL_REPO});
my $reported_repo = $ENV{GL_REPO} . ( $ENV{GL_REPOPATT} ? " ($ENV{GL_REPOPATT})" : "" );
# ----------------------------------------------------------------------------
# start...
# ----------------------------------------------------------------------------
@ -71,17 +56,17 @@ chomp($merge_base = `git merge-base $oldsha $newsha`)
unless $oldsha eq '0' x 40
or $newsha eq '0' x 40;
# what are you trying to do? (is it 'W' or '+'?)
my $perm = 'W';
# att_acc == attempted access -- what are you trying to do? (is it 'W' or '+'?)
my $att_acc = 'W';
# rewriting a tag is considered a rewind, in terms of permissions
$perm = '+' if $ref =~ m(refs/tags/) and $oldsha ne ('0' x 40);
$att_acc = '+' if $ref =~ m(refs/tags/) and $oldsha ne ('0' x 40);
# non-ff push to ref
# notice that ref delete looks like a rewind, as it should
$perm = '+' if $oldsha ne $merge_base;
$att_acc = '+' if $oldsha ne $merge_base;
# were any 'D' perms specified? If they were, it means we have to separate
# deletes from rewinds, so if the new sha is all 0's, change the '+' to a 'D'
$perm = 'D' if ( $repos{$ENV{GL_REPO}}{DELETE_IS_D} or $repos{'@all'}{DELETE_IS_D} ) and $newsha eq '0' x 40;
$att_acc = 'D' if ( $repos{$ENV{GL_REPO}}{DELETE_IS_D} or $repos{'@all'}{DELETE_IS_D} ) and $newsha eq '0' x 40;
my @allowed_refs;
# @all repos: see comments in similar code in check_access
@ -113,12 +98,12 @@ if (exists $repos{$ENV{GL_REPO}}{NAME_LIMITS}) {
# log is the *first* one (which is a *real* ref, like refs/heads/master),
# while all the rest (if they exist) are like NAME/something. So we do the
# first one separately to capture it, then run the rest (if any)
my $log_refex = check_ref(\@allowed_refs, $ENV{GL_REPO}, (shift @refs), $perm);
&check_ref (\@allowed_refs, $ENV{GL_REPO}, $_ , $perm) for @refs;
my $log_refex = check_ref(\@allowed_refs, $ENV{GL_REPO}, (shift @refs), $att_acc);
&check_ref (\@allowed_refs, $ENV{GL_REPO}, $_ , $att_acc) for @refs;
# if we returned at all, all the checks succeeded, so we log the action and exit 0
&log_it("$ENV{GL_TS} $perm\t" .
&log_it("$ENV{GL_TS} $att_acc\t" .
substr($oldsha, 0, 14) . "\t" . substr($newsha, 0, 14) .
"\t$reported_repo\t$ref\t$ENV{GL_USER}\t$log_refex\n");

View file

@ -1,4 +1,7 @@
use strict;
use Data::Dumper;
$Data::Dumper::Deepcopy = 1;
# this file is commonly used using "require". It is not required to use "use"
# (because it doesn't live in a different package)
@ -34,8 +37,9 @@ our $USERNAME_PATT=qr(^\@?[0-9a-zA-Z][0-9a-zA-Z._\@+-]*$); # very simple patter
our $REPOPATT_PATT=qr(^\@?[0-9a-zA-Z][\\^.$|()[\]*+?{}0-9a-zA-Z._\@/-]*$);
# these come from the RC file
our ($REPO_UMASK, $GL_WILDREPOS, $GL_PACKAGE_CONF, $GL_PACKAGE_HOOKS, $REPO_BASE, $GL_CONF_COMPILED);
our ($REPO_UMASK, $GL_WILDREPOS, $GL_PACKAGE_CONF, $GL_PACKAGE_HOOKS, $REPO_BASE, $GL_CONF_COMPILED, $GL_BIG_CONFIG);
our %repos;
our %groups;
# ----------------------------------------------------------------------------
# convenience subs
@ -253,24 +257,39 @@ sub parse_acl
# want the config dumped as is, really
return unless $repo;
# return with "no wildcard match" status if you found the actual repo in
# the config or if wild is unset
return $ENV{GL_REPOPATT} = "" if $repos{$repo} or not $GL_WILDREPOS;
my ($wild, @repo_plus, @user_plus);
# expand $repo and $gl_user into all possible matching values
($wild, @repo_plus) = &get_memberships($repo, 1);
( @user_plus) = &get_memberships($gl_user, 0);
# XXX testing notes: the above should return just one entry during
# non-BC usage, whether wild or not
die "assert 1 failed" if (@repo_plus > 1 and $repo_plus[-1] ne '@all'
or @repo_plus > 2) and not $GL_BIG_CONFIG;
# didn't find actual repo in %repos, and wild is set, so find the repo
# pattern that matches the actual repo
my @matched = grep { $repo =~ /^$_$/ } sort keys %repos;
# the old "convenience copy" thing. Now on steroids :)
# didn't find a match? avoid leaking info to user about repo existence;
# as before, pretend "no wildcard match" status
return $ENV{GL_REPOPATT} = "" unless @matched;
# note that when copying the @all entry, we retain the destination name as
# @all; we dont change it to $repo or $gl_user
for my $r ('@all', @repo_plus) {
my $dr = $repo; $dr = '@all' if $r eq '@all';
$repos{$dr}{DELETE_IS_D} = 1 if $repos{$r}{DELETE_IS_D};
$repos{$dr}{NAME_LIMITS} = 1 if $repos{$r}{NAME_LIMITS};
die "$repo has multiple matches\n@matched\n" if @matched > 1;
for my $u ('@all', @user_plus) {
my $du = $gl_user; $du = '@all' if $u eq '@all';
$repos{$dr}{C}{$du} = 1 if $repos{$r}{C}{$u};
$repos{$dr}{R}{$du} = 1 if $repos{$r}{R}{$u};
$repos{$dr}{W}{$du} = 1 if $repos{$r}{W}{$u};
# found exactly one pattern that matched, copy its ACL for convenience
$repos{$repo} = $repos{$matched[0]};
# and return the pattern
return $ENV{GL_REPOPATT} = $matched[0];
next if $r eq $dr and $u eq $du; # no point duplicating those refexes
push @{ $repos{$dr}{$du} }, @{ $repos{$r}{$u} }
if exists $repos{$r}{$u} and ref($repos{$r}{$u}) eq 'ARRAY';
}
}
$ENV{GL_REPOPATT} = "";
$ENV{GL_REPOPATT} = $wild if $wild and $GL_WILDREPOS;
return ($wild);
}
# ----------------------------------------------------------------------------
@ -295,6 +314,11 @@ sub report_basic
&report_version($GL_ADMINDIR, $user);
print "\rthe gitolite config gives you the following access:\r\n";
for my $r (sort keys %repos) {
if ($r =~ $REPONAME_PATT) {
&parse_acl($GL_CONF_COMPILED, $r, "NOBODY", "NOBODY", "NOBODY");
} else {
&parse_acl($GL_CONF_COMPILED, $r, $ENV{GL_USER}, "NOBODY", "NOBODY");
}
# @all repos; meaning of read/write flags:
# @R => @all users are allowed access to this repo
# #R => you're a super user and can see @all repos
@ -331,7 +355,7 @@ sub expand_wild
# actual_repo has to match the pattern being expanded
next unless $actual_repo =~ /$repo/;
my($perm, $creator) = &repo_rights($actual_repo);
my($perm, $creator, $wild) = &repo_rights($actual_repo);
next unless $perm =~ /\S/;
print "$perm\t$creator\t$actual_repo\n";
}
@ -342,64 +366,67 @@ sub expand_wild
# how/why). Regardless of how we're called, we assume $ENV{GL_USER} is
# already defined
{
my %normal_repos;
my $last_repo = '';
sub repo_rights {
my $repo = shift;
$repo =~ s/^\.\///;
$repo =~ s/\.git$//;
return if $last_repo eq $repo; # a wee bit o' caching, though not yet needed
# we get passed an actual repo name. It may be a normal
# (non-wildcard) repo, in which case it is assumed to exist. If it's
# a wildrepo, it may or may not exist. If it doesn't exist, the "C"
# perms are also filled in, else that column is left blank
unless (%normal_repos) {
unless ($REPO_BASE) {
# means we've been called from outside
&where_is_rc();
die "parse $ENV{GL_RC} failed: " . ($! or $@) unless do $ENV{GL_RC};
}
&parse_acl($GL_CONF_COMPILED, "", "NOBODY", "NOBODY", "NOBODY");
%normal_repos = %repos;
unless ($REPO_BASE) {
# means we've been called from outside; see doc/admin-defined-commands.mkd
&where_is_rc();
die "parse $ENV{GL_RC} failed: " . ($! or $@) unless do $ENV{GL_RC};
}
my $creator;
my $perm = ' ';
my $creator;
# if repo is present "as is" in the config, those permissions will
# override anything inherited from a wildcard that may have matched
if ($normal_repos{$repo}) {
%repos = %normal_repos;
$creator = '<gitolite>';
} elsif ( -d "$ENV{GL_REPO_BASE_ABS}/$repo.git" ) {
# must be a wildrepo, and it has already been created; find the
# creator and subsitute in repos
# get basic info about the repo and fill %repos
my $wild = '';
my $exists = -d "$ENV{GL_REPO_BASE_ABS}/$repo.git";
if ($exists) {
# these will be empty if it's not a wildcard repo anyway
my ($read, $write);
($creator, $read, $write) = &wild_repo_rights($ENV{GL_REPO_BASE_ABS}, $repo, $ENV{GL_USER});
# get access list with these substitutions
&parse_acl($GL_CONF_COMPILED, $repo, $creator || "NOBODY", $read || "NOBODY", $write || "NOBODY");
$wild = &parse_acl($GL_CONF_COMPILED, $repo, $creator || "NOBODY", $read || "NOBODY", $write || "NOBODY");
} else {
$wild = &parse_acl($GL_CONF_COMPILED, $repo, $ENV{GL_USER}, "NOBODY", "NOBODY");
}
if ($exists and not $wild) {
$creator = '<gitolite>';
} elsif ($exists) {
# is a wildrepo, and it has already been created
$creator = "($creator)";
} else {
# repo didn't exist; C perms also need to be filled in after
# getting access list with only creator filled in
&parse_acl($GL_CONF_COMPILED, $repo, $ENV{GL_USER}, "NOBODY", "NOBODY");
# repo didn't exist; C perms need to be filled in
$perm = ( $repos{$repo}{C}{'@all'} ? ' @C' : ( $repos{$repo}{C}{$ENV{GL_USER}} ? ' =C' : ' ' )) if $GL_WILDREPOS;
# if you didn't have perms to create it, delete the "convenience"
# copy of the ACL that parse_acl makes
delete $repos{$repo} unless $perm =~ /C/;
$creator = "<repo_not_found>";
$creator = "<notfound>";
}
$perm .= ( $repos{$repo}{R}{'@all'} ? ' @R' : ( $repos{'@all'}{R}{$ENV{GL_USER}} ? ' #R' : ( $repos{$repo}{R}{$ENV{GL_USER}} ? ' R' : ' ' )));
$perm .= ( $repos{$repo}{W}{'@all'} ? ' @W' : ( $repos{'@all'}{W}{$ENV{GL_USER}} ? ' #W' : ( $repos{$repo}{W}{$ENV{GL_USER}} ? ' W' : ' ' )));
return($perm, $creator);
# set up for caching %repos
$last_repo = $repo;
return($perm, $creator, $wild);
}
}
# helper/convenience routine to get rights and ownership from a shell command
sub cli_repo_rights {
my ($perm, $creator) = &repo_rights($_[0]);
my ($perm, $creator, $wild) = &repo_rights($_[0]);
$perm =~ s/ /_/g;
$creator =~ s/^\(|\)$//g;
print "$perm $creator\n";
@ -443,6 +470,78 @@ sub special_cmd
}
}
# ----------------------------------------------------------------------------
# get memberships
# ----------------------------------------------------------------------------
# given a plain reponame or username, return:
# - the name itself, plus all the groups it belongs to if $GL_BIG_CONFIG is
# set
# OR
# - (for repos) if the name itself doesn't exist in the config, a wildcard
# matching it, plus all the groups that wildcard belongs to (again if
# $GL_BIG_CONFIG is set)
# A name can normally appear (repo example) (user example)
# - directly (repo foo) (RW = bar)
# - (only for repos) as a direct wildcard (repo foo/.*)
# but if $GL_BIG_CONFIG is set, it can also appear:
# - indirectly (@g = foo; repo @g) (@ug = bar; RW = @ug))
# - (only for repos) as an indirect wildcard (@g = foo/.*; repo @g).
# things that may not be obvious from the above:
# - the wildcard stuff does not apply to username memberships
# - for repos, wildcard appearances are TOTALLY ignored if a non-wild
# appearance (direct or indirect) exists
sub get_memberships {
my $base = shift; # reponame or username
my $is_repo = shift; # some true value means a repo name has been passed
my $wild = '';
my (@ret, @ret_w); # maintain wild matches separately from non-wild
# direct
push @ret, $base if not $is_repo or exists $repos{$base};
if ($is_repo and $GL_WILDREPOS and not @ret) {
for my $i (sort keys %repos) {
if ($base =~ /^$i$/) {
die "$ABRT $base matches $wild AND $i\n" if $wild and $wild ne $i;
$wild = $i;
# direct wildcard
push @ret_w, $i;
}
}
}
if ($GL_BIG_CONFIG) {
for my $g (sort keys %groups) {
for my $i (sort keys %{ $groups{$g} }) {
if ($base eq $i) {
# indirect
push @ret, $g;
} elsif ($is_repo and $GL_WILDREPOS and not @ret and $base =~ /^$i$/) {
die "$ABRT $base matches $wild AND $i\n" if $wild and $wild ne $i;
$wild = $i;
# indirect wildcard
push @ret_w, $g;
}
}
}
}
# deal with returning user info first
return (@ret) unless $is_repo;
# enforce the rule about ignoring all wildcard matches if a non-wild match
# exists while returning. (The @ret gating above does not adequately
# ensure this, it is only an optimisation).
#
# Also note that there is an extra return value when called for repos
# (compared to usernames)
return ((@ret ? '' : $wild), (@ret ? @ret : @ret_w));
}
# ----------------------------------------------------------------------------
# generic check access routine
# ----------------------------------------------------------------------------

View file

@ -28,6 +28,7 @@ our ($GL_LOGT, $GL_CONF_COMPILED, $REPO_BASE, $GIT_PATH, $REPO_UMASK, $GL_ADMIND
# and these are set by gitolite.pm
our ($R_COMMANDS, $W_COMMANDS, $REPONAME_PATT, $REPOPATT_PATT);
our %repos;
our %groups;
# the common setup module is in the same directory as this running program is
my $bindir = $0;
@ -55,6 +56,10 @@ my $repo_base_abs = $ENV{GL_REPO_BASE_ABS} = ( $REPO_BASE =~ m(^/) ? $REPO_BASE
# start...
# ----------------------------------------------------------------------------
# no arguments given? default user is $USER (fedorahosted works like this,
# and it is harmless for others)
@ARGV = ($ENV{USER}) unless @ARGV;
# if the first argument is a "-s", this user is allowed to get a shell using
# this key
my $shell_allowed = 0;
@ -184,7 +189,7 @@ $ENV{GL_REPO}=$repo;
# first level permissions check
# ----------------------------------------------------------------------------
my ($perm, $creator) = &repo_rights($repo);
my ($perm, $creator, $wild) = &repo_rights($repo);
if ($perm =~ /C/) {
# it was missing, and you have create perms
wrap_chdir("$repo_base_abs");

View file

@ -52,7 +52,7 @@ $Data::Dumper::Sortkeys = 1;
open STDOUT, ">", "/dev/null" if (@ARGV and shift eq '-q');
# these are set by the "rc" file
our ($GL_ADMINDIR, $GL_CONF, $GL_KEYDIR, $GL_CONF_COMPILED, $REPO_BASE, $REPO_UMASK, $PROJECTS_LIST, $GIT_PATH, $GL_WILDREPOS, $GL_GITCONFIG_KEYS, $GL_PACKAGE_HOOKS, $GL_SETPERMS_OVERRIDES_CONFIG);
our ($GL_ADMINDIR, $GL_CONF, $GL_KEYDIR, $GL_CONF_COMPILED, $REPO_BASE, $REPO_UMASK, $PROJECTS_LIST, $GIT_PATH, $GL_WILDREPOS, $GL_GITCONFIG_KEYS, $GL_PACKAGE_HOOKS, $GL_SETPERMS_OVERRIDES_CONFIG, $GL_BIG_CONFIG);
# and these are set by gitolite.pm
our ($REPONAME_PATT, $REPOPATT_PATT, $USERNAME_PATT, $AUTH_COMMAND, $AUTH_OPTIONS, $ABRT, $WARN);
@ -181,10 +181,12 @@ sub parse_conf_file
# repo(s)
elsif (/^repo (.*)/)
{
# grab the list and expand any @stuff in it
# grab the list...
@repos = split ' ', $1;
unless (@repos == 1 and $repos[0] eq '@all') {
@repos = expand_list ( @repos );
# ...expand groups in the default case
@repos = expand_list ( @repos ) unless $GL_BIG_CONFIG;
# ...sanity check
for (@repos) {
die "$ABRT bad reponame $_\n"
if ($GL_WILDREPOS and $_ !~ $REPOPATT_PATT);
@ -214,7 +216,7 @@ sub parse_conf_file
# expand the user list, unless it is just "@all"
@users = expand_list ( @users )
unless (@users == 1 and $users[0] eq '@all');
unless ($GL_BIG_CONFIG or (@users == 1 and $users[0] eq '@all'));
do { die "$ABRT bad username $_\n" unless $_ =~ $USERNAME_PATT } for @users;
s/\bCREAT[EO]R\b/~\$creator/g for @users;
@ -370,6 +372,7 @@ my $dumped_data = Data::Dumper->Dump([\%repos], [qw(*repos)]);
# much...
$dumped_data =~ s/'(?=[^']*\$(?:creator|readers|writers|gl_user))~*(.*?)'/"$1"/g;
print $compiled_fh $dumped_data;
print $compiled_fh Data::Dumper->Dump([\%groups], [qw(*groups)]) if $GL_BIG_CONFIG and %groups;
close $compiled_fh or die "$ABRT close compiled-conf failed: $!\n";
# ----------------------------------------------------------------------------
@ -406,8 +409,7 @@ wrap_chdir("$repo_base_abs");
for my $repo (sort keys %repos) {
next unless $repo =~ $REPONAME_PATT;
next if $repo =~ m(^EXTCMD/); # these are not real repos
next if $repo eq '@all';
next if $repo =~ m(^\@|EXTCMD/); # these are not real repos
unless (-d "$repo.git") {
print STDERR "creating $repo...\n";
new_repo($repo, "$GL_ADMINDIR/hooks/common");
@ -457,6 +459,7 @@ wrap_chdir("$repo_base_abs");
# daemons first...
for my $repo (sort keys %repos) {
next unless $repo =~ $REPONAME_PATT;
next if $repo =~ m(^\@|EXTCMD/); # these are not real repos
my $export_ok = "$repo.git/git-daemon-export-ok";
if ($repos{$repo}{'R'}{'daemon'}) {
system("touch $export_ok");
@ -469,6 +472,7 @@ my %projlist = ();
# ...then gitwebs
for my $repo (sort keys %repos) {
next unless $repo =~ $REPONAME_PATT;
next if $repo =~ m(^\@|EXTCMD/); # these are not real repos
my $desc_file = "$repo.git/description";
# note: having a description also counts as enabling gitweb
if ($repos{$repo}{'R'}{'gitweb'} or $desc{"$repo.git"}) {