From 183952013476e296be00074715d3b3b1539b87cf Mon Sep 17 00:00:00 2001 From: Sitaram Chamarty Date: Sun, 26 Feb 2012 09:04:41 +0530 Subject: [PATCH] 3 new VREFs plus doc - 'dupkeys' -- catch duplicate keys in keydir - 'email-check' -- "you can only push your own commits" plus, 'merge-check' -- how we could have done the no-merges policy --- contrib/VREF/gl-VREF-DUPKEYS | 45 +++++++++++++ contrib/VREF/gl-VREF-EMAIL_CHECK | 66 +++++++++++++++++++ contrib/VREF/gl-VREF-MERGE_CHECK | 49 ++++++++++++++ contrib/update.detect-dup-pubkeys | 74 --------------------- contrib/update.email-check | 69 -------------------- doc/virtual-refs.mkd | 98 ++++++++++++++++++++++------ hooks/common/update.secondary.sample | 44 ------------- 7 files changed, 239 insertions(+), 206 deletions(-) create mode 100755 contrib/VREF/gl-VREF-DUPKEYS create mode 100755 contrib/VREF/gl-VREF-EMAIL_CHECK create mode 100644 contrib/VREF/gl-VREF-MERGE_CHECK delete mode 100755 contrib/update.detect-dup-pubkeys delete mode 100755 contrib/update.email-check delete mode 100644 hooks/common/update.secondary.sample diff --git a/contrib/VREF/gl-VREF-DUPKEYS b/contrib/VREF/gl-VREF-DUPKEYS new file mode 100755 index 0000000..c7b7cf4 --- /dev/null +++ b/contrib/VREF/gl-VREF-DUPKEYS @@ -0,0 +1,45 @@ +#!/bin/bash + +# gitolite VREF to detect duplicate public keys + +# see gitolite doc/virtual-refs.mkd for what the arguments are +sha=$3 + +# git sets this; and we don't want it at this point... +unset GIT_DIR + +# paranoia +set -e + +# setup the temp area +export TMPDIR=$GL_REPO_BASE_ABS +export tmp=$(mktemp -d -t gl-internal-temp-repo.XXXXXXXXXX); +trap "rm -rf $tmp" EXIT; + +git archive $sha keydir | tar -C $tmp -xf - + # DO NOT try, say, 'GIT_WORK_TREE=$tmp git checkout $sha'. It'll screw up + # both the 'index' and 'HEAD' of the repo.git. Screwing up the index is + # BAD because now it goes out of sync with $GL_ADMINDIR. Think of a push + # that had a deleted pubkey but failed a hooklet for some reason. A + # subsequent push that fixes the error will now result in a $GL_ADMINDIR + # that still *has* that deleted pubkey!! + + # And this is equally applicable to cases where you're using a + # post-receive or similar hook to live update a web site or something, + # which is a pretty common usage, I am given to understand. + +cd $tmp + +for f in `find keydir -name "*.pub"` +do + ssh-keygen -l -f "$f" +done | perl -ane ' + die "FATAL: $F[2] is a duplicate of $seen{$F[1]}\n" if $seen{$F[1]}; + $seen{$F[1]} = $F[2]; +' + +# as you can see, a vref can also 'die' if it wishes to, and it'll take the +# whole update with it if it does. No messing around with sending back a +# vref, having it run through the matches, and printing the DENIED message, +# etc. However, if your push is running from a script, and that script is +# looking for the word "DENIED" or something, then this won't work... diff --git a/contrib/VREF/gl-VREF-EMAIL_CHECK b/contrib/VREF/gl-VREF-EMAIL_CHECK new file mode 100755 index 0000000..34c66f5 --- /dev/null +++ b/contrib/VREF/gl-VREF-EMAIL_CHECK @@ -0,0 +1,66 @@ +#!/usr/bin/perl + +# gitolite VREF to check if all *new* commits have author == pusher + +# THIS IS NOT READY TO USE AS IS +# ------------------------------ +# you MUST change the 'email_ok()' sub to suit *YOUR* site's +# gitolite username -> author email mapping! + +# See bottom of the program for important philosophical notes. + +use strict; +use warnings; + +# mapping between gitolite userid and correct email address is encapsulated in +# this subroutine; change as you like +sub email_ok { + my ($author_email) = shift; + my $expected_email = "$ENV{GL_USER}\@atc.tcs.com"; + return $author_email eq $expected_email; +} + +my ( $ref, $old, $new ) = @ARGV; +for my $rev (`git log --format="%ae\t%h\t%s" $new --not --all`) { + chomp($rev); + my ( $author_email, $hash, $subject ) = split /\t/, $rev; + + # again, we use the trick that a vref can just choose to die instead of + # passing back a vref, having it checked, etc., if it's more convenient + die "$ENV{GL_USER}, you can't push $hash authored by $author_email\n" . "\t(subject of commit was $subject)\n" + unless email_ok($author_email); +} + +exit 0; + +__END__ + +The following discussion is for people who want to enforce this check on ALL +their developers (i.e., not just the newbies). + +Doing this breaks the "D" in "DVCS", forcing all your developers to work to a +centralised model as far as pushes are concerned. It prevents amending +someone else's commit and pushing (this includes rebasing, cherry-picking, and +so on, which are all impossible now). It also makes *any* off-line +collabaration between two developers useless, because neither of them can push +the result to the server. + +PHBs should note that validating the committer ID is NOT the same as reviewing +the code and running QA/tests on it. If you're not reviewing/QA-ing the code, +it's probably worthless anyway. Conversely, if you *are* going to review the +code and run QA/tests anyway, then you don't really need to validate the +author email! + +In a DVCS, if you *pushed* a series of commits, you have -- in some sense -- +signed off on them. The most formal way to "sign" a series is to tack on and +push a gpg-signed tag, although most people don't go that far. Gitolite's log +files are designed to preserve that accountability to *some* extent, though; +see contrib/adc/who-pushed for an admin defined command that quickly and +easily tells you who *pushed* a particular commit. + +Anyway, the point is that the only purpose of this script is to + + * pander to someone who still has not grokked *D*VCS + OR + * tick off an item in some stupid PHB's checklist + diff --git a/contrib/VREF/gl-VREF-MERGE_CHECK b/contrib/VREF/gl-VREF-MERGE_CHECK new file mode 100644 index 0000000..0542a87 --- /dev/null +++ b/contrib/VREF/gl-VREF-MERGE_CHECK @@ -0,0 +1,49 @@ +#!/usr/bin/perl +use strict; +use warnings; + +# gitolite VREF to check if there are any merge commits in the current push. + +# THIS IS DEMO CODE; please read all comments below as well as +# doc/virtual-refs.mkd before trying to use this. + +# usage in conf/gitolite.conf goes like this: + +# - VREF/MERGE_CHECK/master = @all +# # reject only if the merge commit is being pushed to the master branch +# - VREF/MERGE_CHECK = @all +# # reject merge commits to any branch + +my $ref = $ARGV[0]; +my $oldsha = $ARGV[1]; +my $newsha = $ARGV[2]; +my $refex = $ARGV[6]; + +# The following code duplicates some code from parse_conf_line() and some from +# check_ref(). This duplication is the only thing that is preventing me from +# removing the "M" permission code from 'core' gitolite and using this +# instead. However, it does demonstrate how you would do this if you had to +# create any other similar features, for example someone wanted "no non-merge +# first-parent", which is far too specific for me to add to 'core'. + +# -- begin duplication -- +my $branch_refex = $ARGV[7] || ''; +if ($branch_refex) { + $branch_refex =~ m(^refs/) or $branch_refex =~ s(^)(refs/heads/); +} else { + $branch_refex = 'refs/.*'; +} +exit 0 unless $ref =~ /^$branch_refex/; +# -- end duplication -- + +# we can't run this check for tag creation or new branch creation, because +# 'git log' does not deal well with $oldsha = '0' x 40. +if ( $oldsha eq "0" x 40 or $newsha eq "0" x 40 ) { + print STDERR "ref create/delete ignored for purposes of merge-check\n"; + exit 0; +} + +my $ret = `git rev-list -n 1 --merges $oldsha..$newsha`; +print "$refex FATAL: merge commits not allowed\n" if $ret =~ /./; + +exit 0; diff --git a/contrib/update.detect-dup-pubkeys b/contrib/update.detect-dup-pubkeys deleted file mode 100755 index a32db7e..0000000 --- a/contrib/update.detect-dup-pubkeys +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# update "hooklet" to detect duplicate public keys - -# This particular hooklet also serves as an example for people writing others. -# [It should be quite easy to figure out what parts apply to any hooklet and -# what parts are specific to *this* hooklet and its function.] - -# see hooks/common/update.secondary.sample for instructions on *enabling* -# hooklets - -# a hooklet is called as follows: -# git-receive-pack --> 'update' --> 'update.secondary' --> this script -# note: the same three arguments that git passes to the update hook are passed -# along to each hooklet. - -# the update hook, and therefore the hooklets, are called for *every* repo out -# there. If you want this hooklet to run only for certain repos, here's how: -[ "$GL_REPO" = "gitolite-admin" ] || exit 0 - -# superfluous, since update.secondary already did it, but I'd like to -# emphasise that all output MUST go to STDERR -exec >&2 - -# ---- - -# the main functionality of the hooklet starts here. In this one (and I -# suspect many others) we want to examine the actual files from the commit -# that was pushed. - -# get the tip commit being pushed -sha=$3 - -# git sets this; and we don't want it at this point... -unset GIT_DIR - -# paranoia -set -e - -# setup the temp area -export TMPDIR=$GL_REPO_BASE_ABS -export tmp=$(mktemp -d -t gl-internal-temp-repo.XXXXXXXXXX); -trap "rm -rf $tmp" EXIT; - -# now get the files into $tmp. - # (note: if your task does not require the actual files, and you can - # manage with "git cat-file -s" and so on, then you may not even need a - # $tmp; you may be able to do it all right in the repo.git directory) - -git archive $sha keydir | tar -C $tmp -xf - - # DO NOT try, say, 'GIT_WORK_TREE=$tmp git checkout $sha'. It'll screw up - # both the 'index' and 'HEAD' of the repo.git. Screwing up the index is - # BAD because now it goes out of sync with $GL_ADMINDIR. Think of a push - # that had a deleted pubkey but failed a hooklet for some reason. A - # subsequent push that fixes the error will now result in a $GL_ADMINDIR - # that still *has* that deleted pubkey!! - - # And this is equally applicable to cases where you're using a - # post-receive or similar hook to live update a web site or something, - # which is a pretty common usage, I am given to understand. - -cd $tmp - -# ---- - -# *finally*, the actual check you need to do in this hook: look for duplicate -# pubkeys and exit 1 if dups are found -for f in `find keydir -name "*.pub"` -do - ssh-keygen -l -f "$f" -done | perl -ane ' - die "$F[2] is a duplicate of $seen{$F[1]}\n" if $seen{$F[1]}; - $seen{$F[1]} = $F[2]; -' diff --git a/contrib/update.email-check b/contrib/update.email-check deleted file mode 100755 index 7610444..0000000 --- a/contrib/update.email-check +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/perl - -# Technical notes: - - # Gitolite specific script to check "author email" field of every commit - # pushed and to disallow if this email does not match the email that the - # user pushing is expected to have. - - # Use without gitolite is also possible; just substitute your access - # control system's notion of "user" for the env var GL_USER in the code - # below and probably call it "update" if you dont already have an update - # hook. - - # Mapping between "username" and "email address" is encapsulated in a - # subroutine for ease of changing; see code below. - -# Philosophical notes: - - # Doing this breaks the "D" in "DVCS", forcing all your developers to work - # to a centralised model as far as pushes are concerned. It prevents - # amending someone else's commit and pushing (this includes rebasing, - # cherry-picking, and so on, which are all impossible now). It also makes - # *any* off-line collabaration between two developers useless, because - # neither of them can push the result to the server. - - # PHBs should note that validating the committer ID is NOT the same as - # reviewing the code and running QA/tests on it. If you're not - # reviewing/QA-ing the code, it's probably worthless anyway. Conversely, - # if you *are* going to review the code and run QA/tests anyway, then you - # don't really need to validate the author email! - - # In a DVCS, if you *pushed* a series of commits, you have -- in some - # sense -- signed off on them. The most formal way to "sign" a series is - # to tack on and push a gpg-signed tag, although most people don't go that - # far. Gitolite's log files are designed to preserve that accountability - # to *some* extent, though; see contrib/adc/who-pushed for an admin - # defined command that quickly and easily tells you who *pushed* a - # particular commit. - - # Anyway, the point is that the only purpose of this script is to - # - pander to someone who still has not grokked *D*VCS - # OR - # - tick off an item in some stupid PHB's checklist - -use strict; -use warnings; - -# mapping between gitolite userid and correct email address is encapsulated in -# this subroutine; change as you like -sub email_ok -{ - my ($author_email) = shift; - my $expected_email = "$ENV{GL_USER}\@atc.tcs.com"; - return $author_email eq $expected_email; -} - -# print STDERR "SECONDARY HOOK:\n" . join(",", @ARGV, "\n"); - -my ($ref, $old, $new) = @ARGV; -for my $rev ( `git log --format="%ae\t%h\t%s" $new --not --all` ) { - chomp($rev); - my ($author_email, $hash, $subject) = split /\t/, $rev; - - die "$ENV{GL_USER}, you can't push $hash authored by $author_email\n" . - "\t(subject of commit was $subject)\n" - unless email_ok($author_email); -} - -exit 0; diff --git a/doc/virtual-refs.mkd b/doc/virtual-refs.mkd index 44f618f..9cb8db2 100644 --- a/doc/virtual-refs.mkd +++ b/doc/virtual-refs.mkd @@ -19,6 +19,17 @@ Now dev2 and dev3 cannot push changes that affect more than 9 files at a time, nor those that have more than 3 new files. It doesn't affect any other repo, nor does it affect the lead developer. +And here's one you can use right away to catch duplicate pubkeys in the admin +repo; just copy `contrib/VREF/gl-VREF-DUPKEYS` to `src` and upgrade gitolite, +then add this to conf/gitolite.conf: + + repo gitolite-admin + # ... normal rules ... + - VREF/DUPKEYS = @all + +(If you've been using the existing `contrib/update.detect-dup-pubkeys` as a +secondary update hook till now, you may want to switch to this method.) + ---- [[TOC]] @@ -70,6 +81,14 @@ Making fallthru be a "fail" forces you to add rules for all users, instead of just the ones who should have those extra checks. Worse, since every virtual ref involves calling an external program, many of these calls may be wasted. +There's another advantage to doing it this way: a VREF can choose to simply +die if things look bad, and it will have the same effect, assuming you used +the VREF only in [deny][] rules. + +This in turn means any existing update hook can be used as a VREF *as-is*, as +long as it (a) prints nothing on success and (b) dies on failure. See the +email-check and dupkeys examples later. + (Yes, I should have used this logic for [NAME][] also. What can I say -- I am older and wiser now. Sadly, we can't change [NAME][] without breaking a lot of existing configs, so it stays like a real ref -- fallthru is failure). @@ -176,35 +195,75 @@ or none at all, as the case may be). So even if it was invoked using the first rule, it might pass back (to gitolite) a virtual ref saying 'VREF/TIME/HOLIDAY', which would promptly cause the request to be denied. -## other possible examples +## VREFs shipped in contrib -I use these. Don't analyse the numbers -- I fully expect to tune them as time -passes; the idea is the main thing. +### number of new files - * if a dev pushes more than 2 *new* files, the top commit needs to have a - signed-off by line in its commit message. For example if he has 4 new - files this text should be: +If a dev pushes more than 2 *new* files, the top commit needs to have a +signed-off by line in its commit message. For example if he has 4 new files +this text should be: - 4 new files signed-off by: + 4 new files signed-off by: - The config entry for this is this (NO_SIGNOFF applies only to, and thus - implies, NEWFILES). This applies to everyone except me ;-) +The config entry for this is below (`NO_SIGNOFF` applies only to, and thus +implies, `NEWFILES`): - RW+ VREF/COUNT/2/NO_SIGNOFF = sitaram - - VREF/COUNT/2/NO_SIGNOFF = @all + RW+ VREF/COUNT/2/NO_SIGNOFF = sitaram + - VREF/COUNT/2/NO_SIGNOFF = @all - Notice how the refex in both cases is *exactly* the same. If you make it - different (even change the number on my access line), things won't work. +Notice how the refex in both cases is *exactly* the same. If you make it +different (even change the number on my access line), things won't work. - * junior devs can't push more than 10 new files, even with a signed-off by - line: +Junior devs can't push more than 10 new files, even with a signed-off by line: - - VREF/COUNT/10/NEWFILES = @junior_devs + - VREF/COUNT/10/NEWFILES = @junior_devs - * we also need to catch auto-generated files that have filename extensions - that cannot be ".ignore" +### advanced filetype detection - - VREF/FILETYPE/AUTOGENERATED = @all +Note: this is more for illustration than use; it's rather specific to one of +the projects I manage but the idea is the important thing. + +Sometimes a file has a standard extension (that cannot be 'gitignore'd), but +it is actually automatically generated. Here's one way to catch it: + + - VREF/FILETYPE/AUTOGENERATED = @all + +You can look at `contrib/VREF/gl-VREF-FILETYPE` to see how it handles the +'AUTOGENERATED' option. You could also have a more generic option, like +perhaps BINARY, and handle that in the FILETYPE vref too. + +### checking author email + +Some people want to ensure that "you can only push your own commits". + +If you force it on everyone, this is a very silly idea (see "Philosophical +Notes" section of `contrib/VREF/gl-VREF-EMAIL_CHECK`). + +But there may be value in enforcing it just for the junior developers. + +The neat thing is that the existing `contrib/update.email-check` was just +copied to `contrib/VREF/gl-VREF-EMAIL_CHECK` and it works, because VREFs get +the same first 3 arguments and those are all that it cares about. (Note: you +have to change one subroutine in that script if you want to use it) + +### catching duplicate pubkeys + +We covered this as a teaser example at the start. + +## other ideas -- code welcome! + +### "no non-merge first-parents" + +Shruggar on #gitolite wanted this. Possible code to implement it would be +something like this (untested) + + [ -z "$(git rev-list --first-parent --no-merges $2..$3)" ] + +This can be implemented using `contrib/VREF/gl-VREF-MERGE_CHECK` as a model. +That script does what the 'in core' feature called [merge check][mergecheck] +does, although the syntax to be used in conf/gitolite will be quite different. + +### other ideas for VREFs Here are some more ideas: @@ -215,6 +274,7 @@ Here are some more ideas: output) * time of day/day of week (see example snippet somewhere above) * IP address + * phase of the moon Note that pretty much anything that involves `$oldsha..$newsha` will have to deal with the issue that when you push a new tag or branch, the "old" part diff --git a/hooks/common/update.secondary.sample b/hooks/common/update.secondary.sample deleted file mode 100644 index cc6ac98..0000000 --- a/hooks/common/update.secondary.sample +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/sh - -# driver script to run multiple update "hooklets". Each "hooklet" performs a -# specific (possibly site-local) check, and they *all* have to succeed for the -# push to succeed. - -# HOW TO USE: - -# (1) rename this file to remove the .sample extension -# (2) make the renamed file executable (chmod +x) -# (3) put it in ~/.gitolite/hooks/common on the server -# (4) create a directory called update.secondary.d in the same place -# (5) copy all the update "hooklets" you want (like update.detect-dup-pubkeys) -# from contrib or wherever, to that directory -# (6) make them also executable (chmod +x) -# (7) run gl-setup - -# rules for writing a hooklet are in the sample hooklet called -# "update.detect-dup-pubkeys" in contrib - -# ---- - -# NOTE: a hooklet runs under the same assumptions as the 'update' hook, so the -# starting directory must be maintained and arguments must be passed on. - -[ -d hooks/update.secondary.d ] || exit 0 - -# all output from these "hooklets" must go to STDERR to avoid confusing the client -exec >&2 - -for i in hooks/update.secondary.d/* -do - [ -x "$i" ] || continue - # call the hooklet with the same arguments we got - "$i" "$@" || { - # hooklet failed; we need to log it... - echo hooklet $i failed - perl -I$GL_BINDIR -Mgitolite -e "log_it('hooklet $i failed')" - # ...and send back some non-zero exit code ;-) - exit 1 - } -done - -exit 0