gitolite/src/gl-compile-conf
Sitaram Chamarty 91a8379f9f change some 'warn's to 'print STDERR' and vice versa
with warns now being logged, it's nice to make sure that anything that
could even vaguely be considered someone playing with the system, *or*
is otherwise noteworthy, be emitted as a 'warn' instead of as a 'print
STDERR'.  Similarly stuff that is clearly a syntactic warning or typo
should come from 'print STDERR', instead of from a 'warn'.
2011-03-08 14:20:35 +05:30

566 lines
23 KiB
Perl
Executable file

#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
# ----------------------------------------------------------------------------
# find the rc file, then pull the libraries
# ----------------------------------------------------------------------------
BEGIN {
die "ENV GL_RC not set\n" unless $ENV{GL_RC};
die "ENV GL_BINDIR not set\n" unless $ENV{GL_BINDIR};
}
use lib $ENV{GL_BINDIR};
use gitolite_rc;
use gitolite qw(:DEFAULT %repos %groups %git_configs %split_conf);
# === add-auth-keys ===
# setup quiet mode if asked; please do not use this when running manually
open STDOUT, ">", "/dev/null" if (@ARGV and shift eq '-q');
# ----------------------------------------------------------------------------
# definitions specific to this program
# ----------------------------------------------------------------------------
# groups can now represent user groups or repo groups.
# $groups{group}{member} = "master" (or name of fragment file in which the
# group is defined).
# our %groups = (); # moved to gitolite.pm now
# %repos has two functions.
# $repos{repo}{R|W}{user} = 1 if user has R (or W) permissions for at least
# one branch in repo. This is used by the "level 1 check" (see faq). There's
# also the new "C" (create a repo) permission now
# $repos{repo}{user} is a list of {ref, perms} pairs. This is used by the
# level 2 check. In order to allow "exclude" rules, the order of rules now
# matters, so what used to be entirely "hash of hash of hash" now has a list
# in between :)
# copy above desc to lite.pm -- my %repos = ();
# names of repos whose ACLs don't make it into the main compiled config file
# copy above desc to lite.pm -- my %split_conf = ();
# rule sequence number
my $rule_seq = 0;
# <sigh>... having been forced to use a list as described above, we lose some
# efficiency due to the possibility of the same {ref, perms} pair showing up
# multiple times for the same repo+user. So...
my %rurp_seen = ();
# catch usernames<->pubkeys mismatches; search for "lint" below
my %user_list = ();
# repo specific 'git config' stuff
# our %git_configs = (); # moved to gitolite.pm now
# gitweb descriptions and owners; plain text, keyed by "$repo.git"
my %desc = ();
my %owner = ();
# ----------------------------------------------------------------------------
# subroutines
# ----------------------------------------------------------------------------
sub expand_list
{
my @list = @_;
my @new_list = ();
for my $item (@list)
{
if ($item =~ /^@/ and $item ne '@all') # nested group
{
die "$ABRT undefined group $item\n" unless $groups{$item};
# add those names to the list
push @new_list, sort keys %{ $groups{$item} };
}
else
{
push @new_list, $item;
}
}
return @new_list;
}
# ----------------------------------------------------------------------------
# "compile" GL conf
# ----------------------------------------------------------------------------
sub check_fragment_repo_disallowed
{
# trying to set access for $repo (='foo')...
my ($fragment, $repo) = @_;
# processing the master config, not a fragment
return 0 if $fragment eq 'master';
# fragment is also called 'foo' (you're allowed to have a
# fragment that is only concerned with one repo)
return 0 if $fragment eq $repo;
# same thing in big-config-land; foo is just @foo now
return 0 if $GL_BIG_CONFIG and ("\@$fragment" eq $repo);
my @matched = grep { $repo =~ /^$_$/ }
grep { $groups{"\@$fragment"}{$_} eq 'master' }
sort keys %{ $groups{"\@$fragment"} };
return 0 if @matched > 0;
return 1;
}
sub parse_conf_line
{
my ($line, $fragment, $repos_p, $ignored_p) = @_;
# user or repo groups
if ($line =~ /^(@\S+) = ?(.*)/)
{
die "$ABRT defining groups is not allowed inside fragments\n"
if $GL_BIG_CONFIG and $fragment ne 'master';
# store the members of each group as hash key. Keep track of when
# the group was *first* created by using $fragment as the *value*
do { $groups{$1}{$_} ||= $fragment } for ( expand_list( split(' ', $2) ) );
# create the group hash even if empty
$groups{$1} = {} unless $groups{$1};
die "$ABRT bad group $1\n" unless $1 =~ $REPONAME_PATT;
}
# repo(s)
elsif ($line =~ /^repo (.*)/)
{
# grab the list...
@{ $repos_p } = split ' ', $1;
# ...expand groups in the default case
@{ $repos_p } = expand_list ( @{ $repos_p } ) unless $GL_BIG_CONFIG;
# ...sanity check
for (@{ $repos_p }) {
print STDERR "$WARN explicit '.git' extension ignored for $_.git\n" if s/\.git$//;
die "$ABRT bad reponame $_\n"
if ($GL_WILDREPOS and $_ !~ $REPOPATT_PATT);
die "$ABRT bad reponame $_ or you forgot to set \$GL_WILDREPOS\n"
if (not $GL_WILDREPOS and $_ !~ $REPONAME_PATT);
}
s/\bCREAT[EO]R\b/\$creator/g for @{ $repos_p };
}
# actual permission line
elsif ($line =~ /^(-|C|R|RW\+?(?:C?D?|D?C?)) (.* )?= (.+)/)
{
my $perms = $1;
my @refs; @refs = split( ' ', $2 ) if $2;
@refs = expand_list ( @refs );
my @users = split ' ', $3;
die "$ABRT \$GL_WILDREPOS is not set, you cant use 'C' in config\n" if $perms eq 'C' and not $GL_WILDREPOS;
# if no ref is given, this PERM applies to all refs
@refs = qw(refs/.*) unless @refs;
# deprecation warning
map { print STDERR "WARNING: old syntax 'PATH/' found; please use new syntax 'NAME/'\n" if s(^PATH/)(NAME/) } @refs;
# fully qualify refs that dont start with "refs/" or "NAME/";
# prefix them with "refs/heads/"
@refs = map { m(^(refs|NAME)/) or s(^)(refs/heads/); $_ } @refs;
@refs = map { s(/USER/)(/\$gl_user/); $_ } @refs;
# expand the user list, unless it is just "@all"
@users = expand_list ( @users ) unless $GL_BIG_CONFIG;
do { die "$ABRT bad username $_\n" unless $_ =~ $USERNAME_PATT } for @users;
s/\bCREAT[EO]R\b/~\$creator/g for @users;
# ok, we can finally populate the %repos hash
for my $repo (@{ $repos_p }) # each repo in the current stanza
{
# if we're processing a delegated config file (not the master
# config), we need to prevent attempts by that admin to obtain
# rights on stuff outside his domain
# trying to set access for $repo (='foo')...
if (check_fragment_repo_disallowed( $fragment, $repo ))
{
$ignored_p->{$fragment}{$repo} = 1;
next;
}
for my $user (@users)
{
# lint check, to catch pubkey/username typos
if ($user =~ /^@/ and $user ne '@all') {
# this is a usergroup, not a normal user; happens with GL_BIG_CONFIG
if (exists $groups{$user}) {
$user_list{$_}++ for keys %{ $groups{$user} };
}
} else {
$user_list{$user}++;
}
# for 1st level check (see faq/tips doc)
$repos{$repo}{C}{$user} = 1, next if $perms eq 'C';
$repos{$repo}{R}{$user} = 1 if $perms =~ /R/;
$repos{$repo}{W}{$user} = 1 if $perms =~ /W|D/;
# if the user specified even a single 'D' anywhere, make
# that fact easy to find; this changes the meaning of RW+
# to no longer permit deletes (see update hook)
$repos{$repo}{DELETE_IS_D} = 1 if $perms =~ /D/;
$repos{$repo}{CREATE_IS_C} = 1 if $perms =~ /RW.*C/;
# for 2nd level check, store each "ref, perms" pair in order
for my $ref (@refs)
{
# checking NAME based restrictions is expensive for
# the update hook (see the changes to src/hooks/update
# in this commit for why) so we would *very* much like
# to avoid doing it for the large majority of repos
# that do *not* use NAME limits. Setting a flag that
# can be checked right away will help us do that
$repos{$repo}{NAME_LIMITS} = 1 if $ref =~ /^NAME\//;
my $p_user = $user; $p_user =~ s/creator$/creator - wild/;
push @{ $repos{$repo}{$p_user} }, [ $rule_seq++, $ref, $perms ]
unless $rurp_seen{$repo}{$p_user}{$ref}{$perms}++;
}
}
}
}
# repo specific 'git config' stuff
elsif ($line =~ /^config (.+) = ?(.*)/)
{
my ($key, $value) = ($1, $2);
my @validkeys = split(' ', ($GL_GITCONFIG_KEYS || '') );
my @matched = grep { $key =~ /^$_$/ } @validkeys;
die "$ABRT git config $key not allowed\ncheck GL_GITCONFIG_KEYS in the rc file for how to allow it\n" if (@matched < 1);
for my $repo (@{ $repos_p }) # each repo in the current stanza
{
$git_configs{$repo}{$key} = $value;
# no problem if it's a plain repo (non-pattern, non-groupname)
# OR wild configs are allowed
unless ( ($repo =~ $REPONAME_PATT and $repo !~ /^@/) or $GL_GITCONFIG_WILD) {
my @r = ($repo); # single wildpatt
@r = sort keys %{ $groups{$repo} } if $groups{$repo}; # or a group; get its members
do {
warn "$WARN git config set for $_ but \$GL_GITCONFIG_WILD not set\n" unless $_ =~ $REPONAME_PATT
} for @r;
}
}
}
# include
elsif ($line =~ /^include "(.+)"/)
{
my $file = $1;
$file = "$GL_ADMINDIR/conf/$file" unless $file =~ /^\//;
die "$ABRT $fragment attempting to include configuration\n" if $fragment ne 'master';
die "$ABRT included file not found: '$file'\n" unless -f $file;
parse_conf_file( $file, $fragment );
}
# very simple syntax for the gitweb description of repo; one of:
# reponame = "some description string"
# reponame "owner name" = "some description string"
elsif ($line =~ /^(\S+)(?: "(.*?)")? = "(.*)"$/)
{
my ($repo, $owner, $desc) = ($1, $2, $3);
die "$ABRT bad repo name $repo\n" unless $repo =~ $REPONAME_PATT;
die "$ABRT $fragment attempting to set description for $repo\n" if check_fragment_repo_disallowed( $fragment, $repo );
$desc{"$repo.git"} = $desc;
$owner{"$repo.git"} = $owner || '';
}
else
{
die "$ABRT can't make head or tail of '$line'\n";
}
}
sub cleanup_conf_line
{
my ($line) = @_;
# kill comments, but take care of "#" inside *simple* strings
$line =~ s/^((".*?"|[^#"])*)#.*/$1/;
# normalise whitespace; keeps later regexes very simple
$line =~ s/=/ = /;
$line =~ s/\s+/ /g;
$line =~ s/^ //;
$line =~ s/ $//;
return $line;
}
sub parse_conf_file
{
my ($conffile, $fragment) = @_;
# the second arg, $fragment, is passed in as "master" when parsing the
# main config, and the fragment name when parsing a fragment. In the
# latter case, the parser uses that information to ignore (and warn about)
# any repos in the fragment that are not members of the "repo group" of
# the same name.
my %ignored = ();
my $conf_fh = wrap_open( "<", $conffile );
# the syntax is fairly simple, so we parse it inline
my @repos;
my $line;
while (<$conf_fh>)
{
$line = cleanup_conf_line($_);
# skip blank lines
next unless $line =~ /\S/;
parse_conf_line( $line, $fragment, \@repos, \%ignored );
}
for my $ig (sort keys %ignored)
{
warn "\n\t\t***** WARNING *****\n" .
"\t$ig.conf attempting to set access for " .
join (", ", sort keys %{ $ignored{$ig} }) . "\n";
}
}
# parse the main config file
parse_conf_file($GL_CONF, 'master');
# parse any delegated fragments
wrap_chdir($GL_ADMINDIR);
for my $fragment_file (glob("conf/fragments/*.conf"))
{
# we already check (elsewhere) that a fragment called "foo" will not try
# to specify access control for a repo whose name is not "foo" or is not
# part of a group called "foo" created by master
# meanwhile, I found a possible attack where the admin for group B creates
# a "convenience" group of (a subset of) his users, and then the admin for
# repo group A (alphabetically before B) adds himself to that same group
# in his own fragment.
# as a result, admin_A now has access to group B repos :(
# so now we lock the groups hash to the value it had after parsing
# "master", and localise any changes to it by this fragment so that they
# don't propagate to the next fragment. Thus, each fragment now has only
# those groups that are defined in "master" and itself
local %groups = %groups;
my $fragment = $fragment_file;
$fragment =~ s/^conf\/fragments\/(.*).conf$/$1/;
parse_conf_file($fragment_file, $fragment);
}
# ----------------------------------------------------------------------------
# (that ends the config file compiler, though we postpone the writing
# for now to deal with the latest GL_BIG_CONFIG innovation!)
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# what's the git version?
# ----------------------------------------------------------------------------
# we don't like stuff older than 1.6.2
my $git_version = `git --version`;
die "
*** ERROR ***
did not get a proper version number. Please see if git is in the PATH on
the server. If it is not, please edit ~/.gitolite.rc on the server and
set the \$GIT_PATH variable to the correct value\n
" unless $git_version;
my ($gv_maj, $gv_min, $gv_patchrel) = ($git_version =~ m/git version (\d+)\.(\d+)\.(\d+)/);
die "$ABRT I can't understand $git_version\n" unless ($gv_maj >= 1);
$git_version = $gv_maj*10000 + $gv_min*100 + $gv_patchrel; # now it's "normalised"
die "\n\t\t***** AAARGH! *****\n" .
"\tyour git version is older than 1.6.2\n" .
"\tsince that is now more than one year old, and gitolite needs some of\n" .
"\tthe newer features, please upgrade.\n"
if $git_version < 10602; # that's 1.6.2 to you
# ----------------------------------------------------------------------------
# most of the rest of this program can be "switched off"; see
# doc/big-config.mkd for details.
# ----------------------------------------------------------------------------
# repo-base needs to be an absolute path due to all the jumping around we do,
# so if it was not already absolute, prefix $HOME.
$ENV{GL_REPO_BASE_ABS} = ( $REPO_BASE =~ m(^/) ? $REPO_BASE : "$ENV{HOME}/$REPO_BASE" );
# process the normal repos in %repos. This includes creating them if needed
# (and GL_NO_CREATE_REPOS is not set), checking hooks, and finally, if
# GL_BIG_CONFIG is set, writing out the one-repo config file for directly
# specified repos (i.e., "repo foo", not "@grp = foo" + "repo @grp")
do_normal_repos();
write_compiled_conf(); # write out the final compiled config
# ----------------------------------------------------------------------------
# process the normal repos in %repos (create, hook, one_repo config...)
# ----------------------------------------------------------------------------
sub do_normal_repos
{
wrap_chdir("$ENV{GL_REPO_BASE_ABS}");
# start with the ones that are normal repos in %repos
my @repos = grep { $_ =~ $REPONAME_PATT and not /^@/ } sort keys %repos;
# then, for each repogroup, find the members of the group and add them in
map { push @repos, keys %{ $groups{$_} } } grep { /^@/ } keys %repos;
# weed out duplicates (the code in the loop below is disk activity!)
my %seen = map { $_ => 1 } @repos;
@repos = sort keys %seen;
for my $repo (sort @repos) {
next unless $repo =~ $REPONAME_PATT; # skip repo patterns
next if $repo =~ m(^\@|EXTCMD/); # skip groups and fake repos
unless ($GL_NO_CREATE_REPOS) {
unless (-d "$repo.git") {
print STDERR "creating $repo...\n";
new_repo($repo, "$GL_ADMINDIR/hooks/common");
# new_repo would have chdir'd us away; come back
wrap_chdir("$ENV{GL_REPO_BASE_ABS}");
}
# when repos are copied over from elsewhere, one had to run easy install
# once again to make the new (OS-copied) repo contain the proper update
# hook. Perhaps we can make this easier now, and eliminate the easy
# install, with a quick check (and a new, empty, "hook" as a sentinel)
unless (-l "$repo.git/hooks/gitolite-hooked") {
ln_sf("$GL_ADMINDIR/hooks/common", "*", "$repo.git/hooks");
# in case of package install, GL_ADMINDIR is no longer the top cop;
# override with the package hooks
ln_sf("$GL_PACKAGE_HOOKS/common", "*", "$repo.git/hooks") if $GL_PACKAGE_HOOKS;
}
}
# write a one_repo config for normal repos declared directly (not just via a group)
write_1_compiled_conf($repo) if $GL_BIG_CONFIG and $repos{$repo} and -d "$repo.git";
}
}
sub write_1_compiled_conf
{
# warning: writes and *deletes* it from %repos and %git_configs
my ($repo) = shift;
my (%one_repo, %one_git_config);
open(my $compiled_fh, ">", "$repo.git/gl-conf") or return;
$one_repo{$repo} = $repos{$repo};
delete $repos{$repo};
my $dumped_data = Data::Dumper->Dump([\%one_repo], [qw(*one_repo)]);
if ($git_configs{$repo}) {
$one_git_config{$repo} = $git_configs{$repo};
delete $git_configs{$repo};
$dumped_data .= Data::Dumper->Dump([\%one_git_config], [qw(*one_git_config)]);
}
# the dump uses single quotes, but we convert any strings containing $creator
# and $gl_user to double quoted strings. A bit sneaky, but not too much...
$dumped_data =~ s/'(?=[^']*\$(?:creator|gl_user))~?(.*?)'/"$1"/g;
print $compiled_fh $dumped_data;
close $compiled_fh;
$split_conf{$repo} = 1;
}
sub write_compiled_conf
{
my $compiled_fh = wrap_open( ">", "$GL_CONF_COMPILED.new" );
my $data_version = $current_data_version;
print $compiled_fh Data::Dumper->Dump([$data_version], [qw(*data_version)]);
my $dumped_data = Data::Dumper->Dump([\%repos], [qw(*repos)]);
$dumped_data .= Data::Dumper->Dump([\%git_configs], [qw(*git_configs)]) if %git_configs;
# the dump uses single quotes, but we convert any strings containing $creator
# and $gl_user to double quoted strings. A bit sneaky, but not too much...
$dumped_data =~ s/'(?=[^']*\$(?:creator|gl_user))~?(.*?)'/"$1"/g;
print $compiled_fh $dumped_data;
if (%groups) {
$dumped_data = Data::Dumper->Dump([\%groups], [qw(*groups)]);
$dumped_data =~ s/\bCREAT[EO]R\b/\$creator/g;
$dumped_data =~ s/'(?=[^']*\$(?:creator|gl_user))~?(.*?)'/"$1"/g;
print $compiled_fh $dumped_data;
}
print $compiled_fh Data::Dumper->Dump([\%split_conf], [qw(*split_conf)]) if %split_conf;
close $compiled_fh or die "$ABRT close compiled-conf failed: $!\n";
rename "$GL_CONF_COMPILED.new", "$GL_CONF_COMPILED";
}
# ----------------------------------------------------------------------------
# get a list of physical repos for later
# ----------------------------------------------------------------------------
my @phy_repos = ();
@phy_repos = list_phy_repos() unless $GL_NO_DAEMON_NO_GITWEB;
# NOTE: we're overloading GL_NO_DAEMON_NO_GITWEB to mean "no git config" also.
# In fact anything that requires trawling through the existing repos doing
# stuff to all of them is skipped if this variable is set. This is primarily
# for the Fedora folks, but it should be useful for anyone who has a huge set
# of repos and wants to manage gitweb/daemon/etc access via other means (they
# typically have the whole thing controlled by a web-app and a database
# anyway, and gitolite is only doing the access control and nothing more).
# ----------------------------------------------------------------------------
# various updates to all real repos
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# update repo configurations, gitweb description, daemon export-ok, etc
# ----------------------------------------------------------------------------
my %projlist = ();
# for each real repo (and remember this will be empty, thus skipping all this,
# if $GL_NO_DAEMON_NO_GITWEB is on!)
# note: we do them in 2 separate loops to avoid breaking the optimisation in
# sub parse_acl (look for variable $saved_crwu)
for my $repo (@phy_repos) {
wrap_chdir("$ENV{GL_REPO_BASE_ABS}/$repo.git");
# daemon is easy
setup_daemon_access($repo);
}
for my $repo (@phy_repos) {
wrap_chdir("$ENV{GL_REPO_BASE_ABS}/$repo.git");
# gitweb is a little more complicated. Here're some notes:
# - "setup_gitweb_access" also sets "owner", despite the name
# - specifying a description also counts as enabling gitweb
# - description and owner are not specified for wildrepos; they're
# specified for *actual* repos, even if the repo was created by a
# wild card spec and "C" permissions. If you see the docs for the
# gitolite.conf file, you will see that repo owner/desc don't go
# into the "repo foo" section; they're essentialy independent.
# Anyway, I believe it doesn't make sense to have all wild repos
# (for some pattern) to have the same description and owner.
$projlist{"$repo.git"} = 1 if setup_gitweb_access($repo, $desc{"$repo.git"} || '', $owner{"$repo.git"} || '');
# git config
# implementation note: this must happen *after* one of the previous 2
# calls (setup daemon or gitweb). The reason is that they call
# "can_read", which eventually calls parse_acl with the right "creator"
# set for the *current* repo, which in turn stores translated values for
# $creator in the git_configs hash, which, (phew!) is needed for a match
# that eventually gets you a valid $git_configs{} below
setup_git_configs($repo, \%git_configs) if $git_configs{$repo};
}
# write out the project list
my $projlist_fh = wrap_open( ">", $PROJECTS_LIST);
for my $proj (sort keys %projlist) {
print $projlist_fh "$proj\n";
}
close $projlist_fh;
# ----------------------------------------------------------------------------
# "compile" ssh authorized_keys
# ----------------------------------------------------------------------------
unless ($GL_NO_SETUP_AUTHKEYS) {
setup_authkeys($GL_KEYDIR, \%user_list);
}