From cf0e568c89bfd1df2e358336b0eedde19ff2632e Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Mon, 10 May 2010 11:46:47 +0530 Subject: [PATCH 1/8] (big-config) the new "big-config" for large setups If you have many thousands of repos and users, neatly organised into groups, etc., the normal gitolite fails. (It actually runs out of memory very fast while doing the "compile" when you push the config, due to the number of combinations of repo/user being stored in the hash!) This commit series will stop doing that if you set $GL_BIG_CONFIG = 1 in the rc file. Some notes: - deny rules will still work but somewhat differently -- now they must be placed all together in one place to work like before. Ask me for details if you need to know before I get done with the docs - I've tested most of the important features, but not every single nuance - the update hook may be a tad less efficient now; we can try and tweak it later if needed but it shouldn't really hurt anything significantly even now - docs have not been written yet --- conf/example.gitolite.rc | 1 + hooks/common/update | 37 +++----- src/gitolite.pm | 185 ++++++++++++++++++++++++++++++--------- src/gl-auth-command | 7 +- src/gl-compile-conf | 16 ++-- 5 files changed, 170 insertions(+), 76 deletions(-) diff --git a/conf/example.gitolite.rc b/conf/example.gitolite.rc index ae4d077..ecaaaa5 100644 --- a/conf/example.gitolite.rc +++ b/conf/example.gitolite.rc @@ -1,4 +1,5 @@ # paths and configuration variables for gitolite +$GL_BIG_CONFIG = 1; # please read comments before editing diff --git a/hooks/common/update b/hooks/common/update index 0557c0a..f55e2cc 100755 --- a/hooks/common/update +++ b/hooks/common/update @@ -34,27 +34,12 @@ exit 0 if exists $ENV{GL_BYPASS_UPDATE_HOOK}; # we should already have the GL_RC env var set when we enter this hook die "parse $ENV{GL_RC} failed: " . ($! or $@) unless do $ENV{GL_RC}; -# then "do" the compiled config file, whose name we now know. Before doing -# that we setup the creator etc from environment variables so that the parse -# interpolates them. We've minimised the duplication but this *does* -# duplicate a bit of parse_acl from gitolite.pm; we don't want to include that -# file here just for that little bit -{ - our $creator = $ENV{GL_CREATOR}; - our $readers = $ENV{GL_READERS}; - our $writers = $ENV{GL_WRITERS}; - our $gl_user = $ENV{GL_USER}; - die "parse $GL_CONF_COMPILED failed: " . ($! or $@) unless do $GL_CONF_COMPILED; - - $repos{$ENV{GL_REPO}} = $repos{$ENV{GL_REPOPATT}} if ( $ENV{GL_REPOPATT} ); -} -my $reported_repo = $ENV{GL_REPO} . ( $ENV{GL_REPOPATT} ? " ($ENV{GL_REPOPATT})" : "" ); - -# we've started to need some common subs in what used to be a small, cute, -# little script that barely spanned a few lines :( require "$ENV{GL_BINDIR}/gitolite.pm"; +my ($perm, $creator, $wild) = &repo_rights($ENV{GL_REPO}); +my $reported_repo = $ENV{GL_REPO} . ( $ENV{GL_REPOPATT} ? " ($ENV{GL_REPOPATT})" : "" ); + # ---------------------------------------------------------------------------- # start... # ---------------------------------------------------------------------------- @@ -71,17 +56,17 @@ chomp($merge_base = `git merge-base $oldsha $newsha`) unless $oldsha eq '0' x 40 or $newsha eq '0' x 40; -# what are you trying to do? (is it 'W' or '+'?) -my $perm = 'W'; +# att_acc == attempted access -- what are you trying to do? (is it 'W' or '+'?) +my $att_acc = 'W'; # rewriting a tag is considered a rewind, in terms of permissions -$perm = '+' if $ref =~ m(refs/tags/) and $oldsha ne ('0' x 40); +$att_acc = '+' if $ref =~ m(refs/tags/) and $oldsha ne ('0' x 40); # non-ff push to ref # notice that ref delete looks like a rewind, as it should -$perm = '+' if $oldsha ne $merge_base; +$att_acc = '+' if $oldsha ne $merge_base; # were any 'D' perms specified? If they were, it means we have to separate # deletes from rewinds, so if the new sha is all 0's, change the '+' to a 'D' -$perm = 'D' if ( $repos{$ENV{GL_REPO}}{DELETE_IS_D} or $repos{'@all'}{DELETE_IS_D} ) and $newsha eq '0' x 40; +$att_acc = 'D' if ( $repos{$ENV{GL_REPO}}{DELETE_IS_D} or $repos{'@all'}{DELETE_IS_D} ) and $newsha eq '0' x 40; my @allowed_refs; # @all repos: see comments in similar code in check_access @@ -113,12 +98,12 @@ if (exists $repos{$ENV{GL_REPO}}{NAME_LIMITS}) { # log is the *first* one (which is a *real* ref, like refs/heads/master), # while all the rest (if they exist) are like NAME/something. So we do the # first one separately to capture it, then run the rest (if any) -my $log_refex = check_ref(\@allowed_refs, $ENV{GL_REPO}, (shift @refs), $perm); -&check_ref (\@allowed_refs, $ENV{GL_REPO}, $_ , $perm) for @refs; +my $log_refex = check_ref(\@allowed_refs, $ENV{GL_REPO}, (shift @refs), $att_acc); +&check_ref (\@allowed_refs, $ENV{GL_REPO}, $_ , $att_acc) for @refs; # if we returned at all, all the checks succeeded, so we log the action and exit 0 -&log_it("$ENV{GL_TS} $perm\t" . +&log_it("$ENV{GL_TS} $att_acc\t" . substr($oldsha, 0, 14) . "\t" . substr($newsha, 0, 14) . "\t$reported_repo\t$ref\t$ENV{GL_USER}\t$log_refex\n"); diff --git a/src/gitolite.pm b/src/gitolite.pm index 0d64f17..ff1ca5d 100644 --- a/src/gitolite.pm +++ b/src/gitolite.pm @@ -1,4 +1,7 @@ use strict; +use Data::Dumper; +$Data::Dumper::Deepcopy = 1; + # this file is commonly used using "require". It is not required to use "use" # (because it doesn't live in a different package) @@ -34,8 +37,9 @@ our $USERNAME_PATT=qr(^\@?[0-9a-zA-Z][0-9a-zA-Z._\@+-]*$); # very simple patter our $REPOPATT_PATT=qr(^\@?[0-9a-zA-Z][\\^.$|()[\]*+?{}0-9a-zA-Z._\@/-]*$); # these come from the RC file -our ($REPO_UMASK, $GL_WILDREPOS, $GL_PACKAGE_CONF, $GL_PACKAGE_HOOKS, $REPO_BASE, $GL_CONF_COMPILED); +our ($REPO_UMASK, $GL_WILDREPOS, $GL_PACKAGE_CONF, $GL_PACKAGE_HOOKS, $REPO_BASE, $GL_CONF_COMPILED, $GL_BIG_CONFIG); our %repos; +our %groups; # ---------------------------------------------------------------------------- # convenience subs @@ -253,24 +257,39 @@ sub parse_acl # want the config dumped as is, really return unless $repo; - # return with "no wildcard match" status if you found the actual repo in - # the config or if wild is unset - return $ENV{GL_REPOPATT} = "" if $repos{$repo} or not $GL_WILDREPOS; + my ($wild, @repo_plus, @user_plus); + # expand $repo and $gl_user into all possible matching values + ($wild, @repo_plus) = &get_memberships($repo, 1); + ( @user_plus) = &get_memberships($gl_user, 0); + # XXX testing notes: the above should return just one entry during + # non-BC usage, whether wild or not + die "assert 1 failed" if (@repo_plus > 1 and $repo_plus[-1] ne '@all' + or @repo_plus > 2) and not $GL_BIG_CONFIG; - # didn't find actual repo in %repos, and wild is set, so find the repo - # pattern that matches the actual repo - my @matched = grep { $repo =~ /^$_$/ } sort keys %repos; + # the old "convenience copy" thing. Now on steroids :) - # didn't find a match? avoid leaking info to user about repo existence; - # as before, pretend "no wildcard match" status - return $ENV{GL_REPOPATT} = "" unless @matched; + # note that when copying the @all entry, we retain the destination name as + # @all; we dont change it to $repo or $gl_user + for my $r ('@all', @repo_plus) { + my $dr = $repo; $dr = '@all' if $r eq '@all'; + $repos{$dr}{DELETE_IS_D} = 1 if $repos{$r}{DELETE_IS_D}; + $repos{$dr}{NAME_LIMITS} = 1 if $repos{$r}{NAME_LIMITS}; - die "$repo has multiple matches\n@matched\n" if @matched > 1; + for my $u ('@all', @user_plus) { + my $du = $gl_user; $du = '@all' if $u eq '@all'; + $repos{$dr}{C}{$du} = 1 if $repos{$r}{C}{$u}; + $repos{$dr}{R}{$du} = 1 if $repos{$r}{R}{$u}; + $repos{$dr}{W}{$du} = 1 if $repos{$r}{W}{$u}; - # found exactly one pattern that matched, copy its ACL for convenience - $repos{$repo} = $repos{$matched[0]}; - # and return the pattern - return $ENV{GL_REPOPATT} = $matched[0]; + next if $r eq $dr and $u eq $du; # no point duplicating those refexes + push @{ $repos{$dr}{$du} }, @{ $repos{$r}{$u} } + if exists $repos{$r}{$u} and ref($repos{$r}{$u}) eq 'ARRAY'; + } + } + + $ENV{GL_REPOPATT} = ""; + $ENV{GL_REPOPATT} = $wild if $wild and $GL_WILDREPOS; + return ($wild); } # ---------------------------------------------------------------------------- @@ -295,6 +314,11 @@ sub report_basic &report_version($GL_ADMINDIR, $user); print "\rthe gitolite config gives you the following access:\r\n"; for my $r (sort keys %repos) { + if ($r =~ $REPONAME_PATT) { + &parse_acl($GL_CONF_COMPILED, $r, "NOBODY", "NOBODY", "NOBODY"); + } else { + &parse_acl($GL_CONF_COMPILED, $r, $ENV{GL_USER}, "NOBODY", "NOBODY"); + } # @all repos; meaning of read/write flags: # @R => @all users are allowed access to this repo # #R => you're a super user and can see @all repos @@ -331,7 +355,7 @@ sub expand_wild # actual_repo has to match the pattern being expanded next unless $actual_repo =~ /$repo/; - my($perm, $creator) = &repo_rights($actual_repo); + my($perm, $creator, $wild) = &repo_rights($actual_repo); next unless $perm =~ /\S/; print "$perm\t$creator\t$actual_repo\n"; } @@ -342,64 +366,67 @@ sub expand_wild # how/why). Regardless of how we're called, we assume $ENV{GL_USER} is # already defined { - my %normal_repos; - + my $last_repo = ''; sub repo_rights { my $repo = shift; $repo =~ s/^\.\///; $repo =~ s/\.git$//; + return if $last_repo eq $repo; # a wee bit o' caching, though not yet needed + # we get passed an actual repo name. It may be a normal # (non-wildcard) repo, in which case it is assumed to exist. If it's # a wildrepo, it may or may not exist. If it doesn't exist, the "C" # perms are also filled in, else that column is left blank - unless (%normal_repos) { - unless ($REPO_BASE) { - # means we've been called from outside - &where_is_rc(); - die "parse $ENV{GL_RC} failed: " . ($! or $@) unless do $ENV{GL_RC}; - } - - &parse_acl($GL_CONF_COMPILED, "", "NOBODY", "NOBODY", "NOBODY"); - %normal_repos = %repos; + unless ($REPO_BASE) { + # means we've been called from outside; see doc/admin-defined-commands.mkd + &where_is_rc(); + die "parse $ENV{GL_RC} failed: " . ($! or $@) unless do $ENV{GL_RC}; } - my $creator; my $perm = ' '; + my $creator; - # if repo is present "as is" in the config, those permissions will - # override anything inherited from a wildcard that may have matched - if ($normal_repos{$repo}) { - %repos = %normal_repos; - $creator = ''; - } elsif ( -d "$ENV{GL_REPO_BASE_ABS}/$repo.git" ) { - # must be a wildrepo, and it has already been created; find the - # creator and subsitute in repos + # get basic info about the repo and fill %repos + my $wild = ''; + my $exists = -d "$ENV{GL_REPO_BASE_ABS}/$repo.git"; + if ($exists) { + # these will be empty if it's not a wildcard repo anyway my ($read, $write); ($creator, $read, $write) = &wild_repo_rights($ENV{GL_REPO_BASE_ABS}, $repo, $ENV{GL_USER}); # get access list with these substitutions - &parse_acl($GL_CONF_COMPILED, $repo, $creator || "NOBODY", $read || "NOBODY", $write || "NOBODY"); + $wild = &parse_acl($GL_CONF_COMPILED, $repo, $creator || "NOBODY", $read || "NOBODY", $write || "NOBODY"); + } else { + $wild = &parse_acl($GL_CONF_COMPILED, $repo, $ENV{GL_USER}, "NOBODY", "NOBODY"); + } + + if ($exists and not $wild) { + $creator = ''; + } elsif ($exists) { + # is a wildrepo, and it has already been created $creator = "($creator)"; } else { - # repo didn't exist; C perms also need to be filled in after - # getting access list with only creator filled in - &parse_acl($GL_CONF_COMPILED, $repo, $ENV{GL_USER}, "NOBODY", "NOBODY"); + # repo didn't exist; C perms need to be filled in $perm = ( $repos{$repo}{C}{'@all'} ? ' @C' : ( $repos{$repo}{C}{$ENV{GL_USER}} ? ' =C' : ' ' )) if $GL_WILDREPOS; # if you didn't have perms to create it, delete the "convenience" # copy of the ACL that parse_acl makes delete $repos{$repo} unless $perm =~ /C/; - $creator = ""; + $creator = ""; } $perm .= ( $repos{$repo}{R}{'@all'} ? ' @R' : ( $repos{'@all'}{R}{$ENV{GL_USER}} ? ' #R' : ( $repos{$repo}{R}{$ENV{GL_USER}} ? ' R' : ' ' ))); $perm .= ( $repos{$repo}{W}{'@all'} ? ' @W' : ( $repos{'@all'}{W}{$ENV{GL_USER}} ? ' #W' : ( $repos{$repo}{W}{$ENV{GL_USER}} ? ' W' : ' ' ))); - return($perm, $creator); + + # set up for caching %repos + $last_repo = $repo; + + return($perm, $creator, $wild); } } # helper/convenience routine to get rights and ownership from a shell command sub cli_repo_rights { - my ($perm, $creator) = &repo_rights($_[0]); + my ($perm, $creator, $wild) = &repo_rights($_[0]); $perm =~ s/ /_/g; $creator =~ s/^\(|\)$//g; print "$perm $creator\n"; @@ -443,6 +470,78 @@ sub special_cmd } } +# ---------------------------------------------------------------------------- +# get memberships +# ---------------------------------------------------------------------------- + +# given a plain reponame or username, return: +# - the name itself, plus all the groups it belongs to if $GL_BIG_CONFIG is +# set +# OR +# - (for repos) if the name itself doesn't exist in the config, a wildcard +# matching it, plus all the groups that wildcard belongs to (again if +# $GL_BIG_CONFIG is set) + +# A name can normally appear (repo example) (user example) +# - directly (repo foo) (RW = bar) +# - (only for repos) as a direct wildcard (repo foo/.*) +# but if $GL_BIG_CONFIG is set, it can also appear: +# - indirectly (@g = foo; repo @g) (@ug = bar; RW = @ug)) +# - (only for repos) as an indirect wildcard (@g = foo/.*; repo @g). +# things that may not be obvious from the above: +# - the wildcard stuff does not apply to username memberships +# - for repos, wildcard appearances are TOTALLY ignored if a non-wild +# appearance (direct or indirect) exists + +sub get_memberships { + my $base = shift; # reponame or username + my $is_repo = shift; # some true value means a repo name has been passed + + my $wild = ''; + my (@ret, @ret_w); # maintain wild matches separately from non-wild + + # direct + push @ret, $base if not $is_repo or exists $repos{$base}; + if ($is_repo and $GL_WILDREPOS and not @ret) { + for my $i (sort keys %repos) { + if ($base =~ /^$i$/) { + die "$ABRT $base matches $wild AND $i\n" if $wild and $wild ne $i; + $wild = $i; + # direct wildcard + push @ret_w, $i; + } + } + } + + if ($GL_BIG_CONFIG) { + for my $g (sort keys %groups) { + for my $i (sort keys %{ $groups{$g} }) { + if ($base eq $i) { + # indirect + push @ret, $g; + } elsif ($is_repo and $GL_WILDREPOS and not @ret and $base =~ /^$i$/) { + die "$ABRT $base matches $wild AND $i\n" if $wild and $wild ne $i; + $wild = $i; + # indirect wildcard + push @ret_w, $g; + } + } + } + } + + # deal with returning user info first + return (@ret) unless $is_repo; + + # enforce the rule about ignoring all wildcard matches if a non-wild match + # exists while returning. (The @ret gating above does not adequately + # ensure this, it is only an optimisation). + # + # Also note that there is an extra return value when called for repos + # (compared to usernames) + + return ((@ret ? '' : $wild), (@ret ? @ret : @ret_w)); +} + # ---------------------------------------------------------------------------- # generic check access routine # ---------------------------------------------------------------------------- diff --git a/src/gl-auth-command b/src/gl-auth-command index b888af0..7e7faff 100755 --- a/src/gl-auth-command +++ b/src/gl-auth-command @@ -28,6 +28,7 @@ our ($GL_LOGT, $GL_CONF_COMPILED, $REPO_BASE, $GIT_PATH, $REPO_UMASK, $GL_ADMIND # and these are set by gitolite.pm our ($R_COMMANDS, $W_COMMANDS, $REPONAME_PATT, $REPOPATT_PATT); our %repos; +our %groups; # the common setup module is in the same directory as this running program is my $bindir = $0; @@ -55,6 +56,10 @@ my $repo_base_abs = $ENV{GL_REPO_BASE_ABS} = ( $REPO_BASE =~ m(^/) ? $REPO_BASE # start... # ---------------------------------------------------------------------------- +# no arguments given? default user is $USER (fedorahosted works like this, +# and it is harmless for others) +@ARGV = ($ENV{USER}) unless @ARGV; + # if the first argument is a "-s", this user is allowed to get a shell using # this key my $shell_allowed = 0; @@ -184,7 +189,7 @@ $ENV{GL_REPO}=$repo; # first level permissions check # ---------------------------------------------------------------------------- -my ($perm, $creator) = &repo_rights($repo); +my ($perm, $creator, $wild) = &repo_rights($repo); if ($perm =~ /C/) { # it was missing, and you have create perms wrap_chdir("$repo_base_abs"); diff --git a/src/gl-compile-conf b/src/gl-compile-conf index 220d4b8..0f761d0 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -52,7 +52,7 @@ $Data::Dumper::Sortkeys = 1; open STDOUT, ">", "/dev/null" if (@ARGV and shift eq '-q'); # these are set by the "rc" file -our ($GL_ADMINDIR, $GL_CONF, $GL_KEYDIR, $GL_CONF_COMPILED, $REPO_BASE, $REPO_UMASK, $PROJECTS_LIST, $GIT_PATH, $GL_WILDREPOS, $GL_GITCONFIG_KEYS, $GL_PACKAGE_HOOKS, $GL_SETPERMS_OVERRIDES_CONFIG); +our ($GL_ADMINDIR, $GL_CONF, $GL_KEYDIR, $GL_CONF_COMPILED, $REPO_BASE, $REPO_UMASK, $PROJECTS_LIST, $GIT_PATH, $GL_WILDREPOS, $GL_GITCONFIG_KEYS, $GL_PACKAGE_HOOKS, $GL_SETPERMS_OVERRIDES_CONFIG, $GL_BIG_CONFIG); # and these are set by gitolite.pm our ($REPONAME_PATT, $REPOPATT_PATT, $USERNAME_PATT, $AUTH_COMMAND, $AUTH_OPTIONS, $ABRT, $WARN); @@ -181,10 +181,12 @@ sub parse_conf_file # repo(s) elsif (/^repo (.*)/) { - # grab the list and expand any @stuff in it + # grab the list... @repos = split ' ', $1; unless (@repos == 1 and $repos[0] eq '@all') { - @repos = expand_list ( @repos ); + # ...expand groups in the default case + @repos = expand_list ( @repos ) unless $GL_BIG_CONFIG; + # ...sanity check for (@repos) { die "$ABRT bad reponame $_\n" if ($GL_WILDREPOS and $_ !~ $REPOPATT_PATT); @@ -214,7 +216,7 @@ sub parse_conf_file # expand the user list, unless it is just "@all" @users = expand_list ( @users ) - unless (@users == 1 and $users[0] eq '@all'); + unless ($GL_BIG_CONFIG or (@users == 1 and $users[0] eq '@all')); do { die "$ABRT bad username $_\n" unless $_ =~ $USERNAME_PATT } for @users; s/\bCREAT[EO]R\b/~\$creator/g for @users; @@ -370,6 +372,7 @@ my $dumped_data = Data::Dumper->Dump([\%repos], [qw(*repos)]); # much... $dumped_data =~ s/'(?=[^']*\$(?:creator|readers|writers|gl_user))~*(.*?)'/"$1"/g; print $compiled_fh $dumped_data; +print $compiled_fh Data::Dumper->Dump([\%groups], [qw(*groups)]) if $GL_BIG_CONFIG and %groups; close $compiled_fh or die "$ABRT close compiled-conf failed: $!\n"; # ---------------------------------------------------------------------------- @@ -406,8 +409,7 @@ wrap_chdir("$repo_base_abs"); for my $repo (sort keys %repos) { next unless $repo =~ $REPONAME_PATT; - next if $repo =~ m(^EXTCMD/); # these are not real repos - next if $repo eq '@all'; + next if $repo =~ m(^\@|EXTCMD/); # these are not real repos unless (-d "$repo.git") { print STDERR "creating $repo...\n"; new_repo($repo, "$GL_ADMINDIR/hooks/common"); @@ -457,6 +459,7 @@ wrap_chdir("$repo_base_abs"); # daemons first... for my $repo (sort keys %repos) { next unless $repo =~ $REPONAME_PATT; + next if $repo =~ m(^\@|EXTCMD/); # these are not real repos my $export_ok = "$repo.git/git-daemon-export-ok"; if ($repos{$repo}{'R'}{'daemon'}) { system("touch $export_ok"); @@ -469,6 +472,7 @@ my %projlist = (); # ...then gitwebs for my $repo (sort keys %repos) { next unless $repo =~ $REPONAME_PATT; + next if $repo =~ m(^\@|EXTCMD/); # these are not real repos my $desc_file = "$repo.git/description"; # note: having a description also counts as enabling gitweb if ($repos{$repo}{'R'}{'gitweb'} or $desc{"$repo.git"}) { From c8f83a03dd422370882abbc2ef0f9e315af74c96 Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Fri, 14 May 2010 20:40:59 +0530 Subject: [PATCH 2/8] (big-config) compile: create new repos even if GL_BIG_CONFIG is set ...by expanding the groups of course --- src/gl-compile-conf | 48 +++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/gl-compile-conf b/src/gl-compile-conf index 0f761d0..dd96381 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -405,27 +405,37 @@ die "\n\t\t***** AAARGH! *****\n" . # so if it was not already absolute, prefix $HOME. my $repo_base_abs = ( $REPO_BASE =~ m(^/) ? $REPO_BASE : "$ENV{HOME}/$REPO_BASE" ); -wrap_chdir("$repo_base_abs"); +{ + wrap_chdir("$repo_base_abs"); -for my $repo (sort keys %repos) { - next unless $repo =~ $REPONAME_PATT; - next if $repo =~ m(^\@|EXTCMD/); # these are not real repos - unless (-d "$repo.git") { - print STDERR "creating $repo...\n"; - new_repo($repo, "$GL_ADMINDIR/hooks/common"); - # new_repo would have chdir'd us away; come back - wrap_chdir("$repo_base_abs"); - } + # autocreate repos. Start with the ones that are normal repos in %repos + my @repos = grep { $_ =~ $REPONAME_PATT and not /^@/ } sort keys %repos; + # then, for each repogroup, find the members of the group and add them in + map { push @repos, keys %{ $groups{$_} } } grep { /^@/ } keys %repos; + # weed out duplicates (the code in the loop below is disk activity!) + my %seen = map { $_ => 1 } @repos; + @repos = sort keys %seen; - # when repos are copied over from elsewhere, one had to run easy install - # once again to make the new (OS-copied) repo contain the proper update - # hook. Perhaps we can make this easier now, and eliminate the easy - # install, with a quick check (and a new, empty, "hook" as a sentinel) - unless (-l "$repo.git/hooks/gitolite-hooked") { - ln_sf("$GL_ADMINDIR/hooks/common", "*", "$repo.git/hooks"); - # in case of package install, GL_ADMINDIR is no longer the top cop; - # override with the package hooks - ln_sf("$GL_PACKAGE_HOOKS/common", "*", "$repo.git/hooks") if $GL_PACKAGE_HOOKS; + for my $repo (sort @repos) { + next unless $repo =~ $REPONAME_PATT; + next if $repo =~ m(^\@|EXTCMD/); # these are not real repos + unless (-d "$repo.git") { + print STDERR "creating $repo...\n"; + new_repo($repo, "$GL_ADMINDIR/hooks/common"); + # new_repo would have chdir'd us away; come back + wrap_chdir("$repo_base_abs"); + } + + # when repos are copied over from elsewhere, one had to run easy install + # once again to make the new (OS-copied) repo contain the proper update + # hook. Perhaps we can make this easier now, and eliminate the easy + # install, with a quick check (and a new, empty, "hook" as a sentinel) + unless (-l "$repo.git/hooks/gitolite-hooked") { + ln_sf("$GL_ADMINDIR/hooks/common", "*", "$repo.git/hooks"); + # in case of package install, GL_ADMINDIR is no longer the top cop; + # override with the package hooks + ln_sf("$GL_PACKAGE_HOOKS/common", "*", "$repo.git/hooks") if $GL_PACKAGE_HOOKS; + } } } From 0139fe0e9716d7035ce81a19a0987e4119234b43 Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Fri, 14 May 2010 18:20:31 +0530 Subject: [PATCH 3/8] (big-config) compile: dont complain about "@foo" not having a pubkey --- src/gl-compile-conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl-compile-conf b/src/gl-compile-conf index dd96381..9dbb6fb 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -576,7 +576,7 @@ for my $pubkey (`find . -type f`) # lint check 3; a little more severe than the first two I guess... for my $user (sort keys %user_list) { - next if $user =~ /^(gitweb|daemon|\@all|~\$creator|\$readers|\$writers)$/ or $user_list{$user} eq 'has pubkey'; + next if $user =~ /^(gitweb|daemon|\@.*|~\$creator|\$readers|\$writers)$/ or $user_list{$user} eq 'has pubkey'; print STDERR "$WARN user $user in config, but has no pubkey!\n"; } From d11a27924b83c8b795d445eb2ace9fddc3041a42 Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Wed, 12 May 2010 22:09:51 +0530 Subject: [PATCH 4/8] (big-config) compile: fragments in big-config Since it is possible to do all sorts of shenanigans with wildcards and repo groups, we - allow only a fragment called "foo" to set permissions for a group called "@foo", in addition to a repo called "foo" - forbid defining any groups within a fragment conf. All "@foo = bar baz" must be done in the main config file now. If this proves too limiting for anyone I'll worry about it then. --- src/gl-compile-conf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gl-compile-conf b/src/gl-compile-conf index 9dbb6fb..5c02160 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -173,6 +173,8 @@ sub parse_conf_file # user or repo groups if (/^(@\S+) = (.*)/) { + die "$ABRT defining groups is not allowed inside fragments\n" + if $GL_BIG_CONFIG and $fragment ne 'master'; # store the members of each group as hash key. Keep track of when # the group was *first* created by using $fragment as the *value* do { $groups{$1}{$_} ||= $fragment } for ( expand_list( split(' ', $2) ) ); @@ -239,6 +241,8 @@ sub parse_conf_file # fragment is also called 'foo' (you're allowed to have a # fragment that is only concerned with one repo) ( $fragment eq $repo ) or + # same thing in big-config-land; foo is just @foo now + ( $GL_BIG_CONFIG and "\@$fragment" eq $repo ) or # fragment is called "bar" and "@bar = foo" has been # defined in the master config ( ($groups{"\@$fragment"}{$repo} || '') eq 'master' ) From 8da223f92a130256a7640d89b1508c2b1ef55ab4 Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Fri, 14 May 2010 16:35:37 +0530 Subject: [PATCH 5/8] (big-config) allow usergroup information to be passed in from outside [Please NOTE: this is all about *user* groups, not *repo* groups] SUMMARY: gl-auth-commmand can now take an optional list of usergroup names after the first argument (which is the username). See doc/big-config.mkd in the next commit or so --- src/gitolite.pm | 8 +++++++- src/gl-auth-command | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/gitolite.pm b/src/gitolite.pm index ff1ca5d..9501257 100644 --- a/src/gitolite.pm +++ b/src/gitolite.pm @@ -530,7 +530,13 @@ sub get_memberships { } # deal with returning user info first - return (@ret) unless $is_repo; + unless ($is_repo) { + # add in group membership info sent in via second and subsequent + # arguments to gl-auth-command; be sure to prefix the "@" sign to each + # of them! + push @ret, map { s/^/@/; $_; } split(' ', $ENV{GL_GROUP_LIST}) if $ENV{GL_GROUP_LIST}; + return (@ret); + } # enforce the rule about ignoring all wildcard matches if a non-wild match # exists while returning. (The @ret gating above does not adequately diff --git a/src/gl-auth-command b/src/gl-auth-command index 7e7faff..80fd393 100755 --- a/src/gl-auth-command +++ b/src/gl-auth-command @@ -71,6 +71,10 @@ if ($ARGV[0] eq '-s') { # first, fix the biggest gripe I have with gitosis, a 1-line change my $user=$ENV{GL_USER}=shift; # there; now that's available everywhere! +# if there are any more arguments, they're a list of group names that the user +# is a member of +$ENV{GL_GROUP_LIST} = join(" ", @ARGV) if @ARGV; + # ---------------------------------------------------------------------------- # logging, timestamp env vars # ---------------------------------------------------------------------------- From 58fc6a32529e639d60a70a96fbc32f6201e20ca6 Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Fri, 14 May 2010 18:20:57 +0530 Subject: [PATCH 6/8] (big-config) documentation --- doc/big-config.mkd | 200 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 doc/big-config.mkd diff --git a/doc/big-config.mkd b/doc/big-config.mkd new file mode 100644 index 0000000..a1fe0ad --- /dev/null +++ b/doc/big-config.mkd @@ -0,0 +1,200 @@ +# what is a "big-config" + +In this document: + + * when/why do we need it? + * how do we use it? + * what are the downsides? + * (extra coolness) usergroups and LDAP/similar tools + +### when/why do we need it? + +A "big config" is anything that has more than a few thousand users and a few +thousand repos, organised into groups that are much smaller in number (like +maybe a few hundreds of repogroups and a few dozens of usergroups). + +So let's say you have + + @wbr = lynx firefox + @devs = alice bob + + repo @wbr + RW+ next = @devs + RW master = @devs + +Gitolite internally translates this to + + repo lynx firefox + RW+ next = alice bob + RW master = alice bob + +Not just that -- it now generates the actual config rules once for each +user-repo-ref combination (there are 8 combinations above; the compiled config +file looks partly like this: + + %repos = ( + 'firefox' => { + 'R' => { + 'alice' => 1, + 'bob' => 1 + }, + 'W' => { + 'alice' => 1, + 'bob' => 1 + }, + 'alice' => [ + { + 'refs/heads/next' => 'RW+' + }, + { + 'refs/heads/master' => 'RW' + } + ], + 'bob' => [ + { + 'refs/heads/next' => 'RW+' + }, + { + 'refs/heads/master' => 'RW' + } + ] + }, + 'lynx' => { + 'R' => { + 'alice' => 1, + 'bob' => 1 + }, + 'W' => { + 'alice' => 1, + 'bob' => 1 + }, + 'alice' => [ + { + 'refs/heads/next' => 'RW+' + }, + { + 'refs/heads/master' => 'RW' + } + ], + 'bob' => [ + { + 'refs/heads/next' => 'RW+' + }, + { + 'refs/heads/master' => 'RW' + } + ] + } + ); + +Phew! + +You can imagine what that does when you have 10,000 users and 10,000 repos. +Let's just say it's not pretty :) + +### how do we use it? + +Now, if you had all those 10,000 users and repos explicitly listed (no +groups), then there is no help. But if, like the above example, you had +groups like we used above, there is hope. + +Just set + + $GL_BIG_CONFIG = 1; + +in the `~/.gitolite.rc` file on the server. When you do that, and push this +configuration, the compiled file looks like this: + + %repos = ( + '@wbr' => { + '@devs' => [ + { + 'refs/heads/next' => 'RW+' + }, + { + 'refs/heads/master' => 'RW' + } + ], + 'R' => { + '@devs' => 1 + }, + 'W' => { + '@devs' => 1 + } + }, + ); + %groups = ( + '@devs' => { + 'alice' => 'master', + 'bob' => 'master' + }, + '@wbr' => { + 'firefox' => 'master', + 'lynx' => 'master' + } + ); + +That's a lot smaller, and allows orders of magintude more repos and groups to +be supported. + +### what are the downsides? + +There are a few minor issues. + +First, "deny" rules (rules whose "permission" is a "-" sign), will not work +exactly the same as before. + +[TODO: add a nice example etc...] + +Second, if you use the delegation feature, you can no longer define or extend +@groups in a fragment, for security reasons. It will also not let you use any +group other than the @fragname itself (specifically, groups which contained a +subset of the allowed @fragname, which would work normally, do not work now). + +(If you didn't understand all that, you're probably not using delegation, so +feel free to ignore it!) + +### (extra coolness) usergroups and LDAP/similar tools + +[Please NOTE: this is all about *user* groups, not *repo* groups] + +Gitolite now allows usergroup information to be passed in from outside. The +`gl-auth-commmand` can now take an optional list of usergroup names after the +first argument (which is the username). + +To understand why this is useful, consider the following: + +Some people have an LDAP-backed ssh daemon (or some other similar mechanism +that can speak "ssh" to the client), with pubkeys stored in LDAP, etc., and +some way (not using `~/.ssh/authorized_keys`) of invoking gitolite. + +Such setups also have "usergroups", and a way to tell, for each user, what +groups he/she is a member of. So let's say "alice" works on projects "foo" +and "bar", while "bob" is works on project "bar" and is a member of the +`L3_support` team. + +You can use those group names in the gitolite config file for access control +as "@foo", "@bar", `@L3_support`, etc.; please note the "@" prefix because +gitolite requires it. + +However, that still leaves a wee little inconvenience. You still have to add +lines like this to the gitolite config file: + + @foo = alice + @bar = alice bob + @L3_support = bob + +You don't need to do that anymore now. Tell your authentication system that, +after authenticating alice, instead of running: + + /some/path/to/gl-auth-command alice + +it should first find the groups that alice is a member of, and then run: + + /some/path/to/gl-auth-command alice foo bar + +That's it. Internally, gitolite will behave as if the config file had also +specified: + + @foo = alice + @bar = alice From 35750c1abec18e38d1452c756b8c5e59d861308f Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Sun, 16 May 2010 06:18:08 +0530 Subject: [PATCH 7/8] (big-config) update doc and rc, allow skipping gitweb/daemon skipping gitweb/daemon has an enormous impact on speed of an admin-push! --- conf/example.gitolite.rc | 7 +++- doc/big-config.mkd | 25 +++++++++-- src/gl-compile-conf | 90 ++++++++++++++++++++-------------------- 3 files changed, 74 insertions(+), 48 deletions(-) diff --git a/conf/example.gitolite.rc b/conf/example.gitolite.rc index ecaaaa5..4821596 100644 --- a/conf/example.gitolite.rc +++ b/conf/example.gitolite.rc @@ -1,5 +1,4 @@ # paths and configuration variables for gitolite -$GL_BIG_CONFIG = 1; # please read comments before editing @@ -83,7 +82,13 @@ $GIT_PATH=""; # -------------------------------------- +# ---------------------------------------------------------------------- +# BIG CONFIG SETTINGS +# Please read doc/big-config.mkd for details + +$GL_BIG_CONFIG = 0; +$GL_NO_DAEMON_NO_GITWEB = 0; # ---------------------------------------------------------------------- # SECURITY SENSITIVE SETTINGS diff --git a/doc/big-config.mkd b/doc/big-config.mkd index a1fe0ad..5ff551f 100644 --- a/doc/big-config.mkd +++ b/doc/big-config.mkd @@ -4,14 +4,15 @@ In this document: * when/why do we need it? * how do we use it? + * summary of settings in RC file * what are the downsides? * (extra coolness) usergroups and LDAP/similar tools ### when/why do we need it? -A "big config" is anything that has more than a few thousand users and a few -thousand repos, organised into groups that are much smaller in number (like -maybe a few hundreds of repogroups and a few dozens of usergroups). +A "big config" is anything that has a few thousand users and a few thousand +repos, organised into groups that are much smaller in number (like maybe a few +hundreds of repogroups and a few dozens of usergroups). So let's say you have @@ -137,6 +138,24 @@ configuration, the compiled file looks like this: That's a lot smaller, and allows orders of magintude more repos and groups to be supported. +### summary of settings in RC file + +The default RC file contains the following lines: + + $GL_BIG_CONFIG = 0; + $GL_NO_DAEMON_NO_GITWEB = 0; + +The first setting means that by default, big-config is off; you can change it +to 1 to enable it. + +The second is a very useful optimisation that you *must* enable if you *do* +have a large number of repositories, and do *not* use gitolite's support for +gitweb or git-daemon access (see "[easier to specify gitweb description and +gitweb/daemon access][gw]" for details). This will save a lot of time when +you push the gitolite-admin repo with changes. + +[gw]: http://github.com/sitaramc/gitolite/blob/pu/doc/3-faq-tips-etc.mkd#gitweb + ### what are the downsides? There are a few minor issues. diff --git a/src/gl-compile-conf b/src/gl-compile-conf index 5c02160..e6d1268 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -52,7 +52,7 @@ $Data::Dumper::Sortkeys = 1; open STDOUT, ">", "/dev/null" if (@ARGV and shift eq '-q'); # these are set by the "rc" file -our ($GL_ADMINDIR, $GL_CONF, $GL_KEYDIR, $GL_CONF_COMPILED, $REPO_BASE, $REPO_UMASK, $PROJECTS_LIST, $GIT_PATH, $GL_WILDREPOS, $GL_GITCONFIG_KEYS, $GL_PACKAGE_HOOKS, $GL_SETPERMS_OVERRIDES_CONFIG, $GL_BIG_CONFIG); +our ($GL_ADMINDIR, $GL_CONF, $GL_KEYDIR, $GL_CONF_COMPILED, $REPO_BASE, $REPO_UMASK, $PROJECTS_LIST, $GIT_PATH, $GL_WILDREPOS, $GL_GITCONFIG_KEYS, $GL_PACKAGE_HOOKS, $GL_SETPERMS_OVERRIDES_CONFIG, $GL_BIG_CONFIG, $GL_NO_DAEMON_NO_GITWEB); # and these are set by gitolite.pm our ($REPONAME_PATT, $REPOPATT_PATT, $USERNAME_PATT, $AUTH_COMMAND, $AUTH_OPTIONS, $ABRT, $WARN); @@ -470,58 +470,60 @@ for my $repo (keys %repo_config) { wrap_chdir("$repo_base_abs"); -# daemons first... -for my $repo (sort keys %repos) { - next unless $repo =~ $REPONAME_PATT; - next if $repo =~ m(^\@|EXTCMD/); # these are not real repos - my $export_ok = "$repo.git/git-daemon-export-ok"; - if ($repos{$repo}{'R'}{'daemon'}) { - system("touch $export_ok"); - } else { - unlink($export_ok); - } -} - -my %projlist = (); -# ...then gitwebs -for my $repo (sort keys %repos) { - next unless $repo =~ $REPONAME_PATT; - next if $repo =~ m(^\@|EXTCMD/); # these are not real repos - my $desc_file = "$repo.git/description"; - # note: having a description also counts as enabling gitweb - if ($repos{$repo}{'R'}{'gitweb'} or $desc{"$repo.git"}) { - $projlist{"$repo.git"} = 1; - # add the description file; no messages to user or error checking :) - $desc{"$repo.git"} and open(DESC, ">", $desc_file) and print DESC $desc{"$repo.git"} . "\n" and close DESC; - if ($owner{"$repo.git"}) { - # set the repository owner - system("git", "--git-dir=$repo.git", "config", "gitweb.owner", $owner{"$repo.git"}); +unless ($GL_NO_DAEMON_NO_GITWEB) { + # daemons first... + for my $repo (sort keys %repos) { + next unless $repo =~ $REPONAME_PATT; + next if $repo =~ m(^\@|EXTCMD/); # these are not real repos + my $export_ok = "$repo.git/git-daemon-export-ok"; + if ($repos{$repo}{'R'}{'daemon'}) { + system("touch $export_ok"); } else { + unlink($export_ok); + } + } + + my %projlist = (); + # ...then gitwebs + for my $repo (sort keys %repos) { + next unless $repo =~ $REPONAME_PATT; + next if $repo =~ m(^\@|EXTCMD/); # these are not real repos + my $desc_file = "$repo.git/description"; + # note: having a description also counts as enabling gitweb + if ($repos{$repo}{'R'}{'gitweb'} or $desc{"$repo.git"}) { + $projlist{"$repo.git"} = 1; + # add the description file; no messages to user or error checking :) + $desc{"$repo.git"} and open(DESC, ">", $desc_file) and print DESC $desc{"$repo.git"} . "\n" and close DESC; + if ($owner{"$repo.git"}) { + # set the repository owner + system("git", "--git-dir=$repo.git", "config", "gitweb.owner", $owner{"$repo.git"}); + } else { + # remove the repository owner setting + system("git --git-dir=$repo.git config --unset-all gitweb.owner 2>/dev/null"); + } + } else { + # delete the description file; no messages to user or error checking :) + unlink $desc_file; # remove the repository owner setting system("git --git-dir=$repo.git config --unset-all gitweb.owner 2>/dev/null"); } - } else { - # delete the description file; no messages to user or error checking :) - unlink $desc_file; - # remove the repository owner setting - system("git --git-dir=$repo.git config --unset-all gitweb.owner 2>/dev/null"); + + # unless there are other gitweb.* keys set, remove the section to keep the + # config file clean + my $keys = `git --git-dir=$repo.git config --get-regexp '^gitweb\\.' 2>/dev/null`; + if (length($keys) == 0) { + system("git --git-dir=$repo.git config --remove-section gitweb 2>/dev/null"); + } } - # unless there are other gitweb.* keys set, remove the section to keep the - # config file clean - my $keys = `git --git-dir=$repo.git config --get-regexp '^gitweb\\.' 2>/dev/null`; - if (length($keys) == 0) { - system("git --git-dir=$repo.git config --remove-section gitweb 2>/dev/null"); + # update the project list + my $projlist_fh = wrap_open( ">", $PROJECTS_LIST); + for my $proj (sort keys %projlist) { + print $projlist_fh "$proj\n"; } + close $projlist_fh; } -# update the project list -my $projlist_fh = wrap_open( ">", $PROJECTS_LIST); -for my $proj (sort keys %projlist) { - print $projlist_fh "$proj\n"; -} -close $projlist_fh; - # ---------------------------------------------------------------------------- # "compile" ssh authorized_keys # ---------------------------------------------------------------------------- From aa8da9301674874fe7f958c0d49d773e8333b5e3 Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Sun, 16 May 2010 13:13:25 +0530 Subject: [PATCH 8/8] tone down the "ZOMG users without pubkeys" hysteria :) --- src/gl-compile-conf | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/gl-compile-conf b/src/gl-compile-conf index e6d1268..d6d6dbf 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -580,10 +580,16 @@ for my $pubkey (`find . -type f`) print $newkeys_fh $pubkey_content; } # lint check 3; a little more severe than the first two I guess... -for my $user (sort keys %user_list) { - next if $user =~ /^(gitweb|daemon|\@.*|~\$creator|\$readers|\$writers)$/ or $user_list{$user} eq 'has pubkey'; - print STDERR "$WARN user $user in config, but has no pubkey!\n"; + my @no_pubkey = + grep { $_ !~ /^(gitweb|daemon|\@.*|~\$creator|\$readers|\$writers)$/ } + grep { $user_list{$_} ne 'has pubkey' } + keys %user_list; + if (@no_pubkey > 10) { + print STDERR "$WARN You have " . scalar(@no_pubkey) . " users WITHOUT pubkeys...!\n"; + } elsif (@no_pubkey) { + print STDERR "$WARN the following users have no pubkeys:\n", join(",", sort @no_pubkey), "\n"; + } } print $newkeys_fh "# gitolite end\n";