diff --git a/conf/example.gitolite.rc b/conf/example.gitolite.rc index 29272dc..dbde473 100644 --- a/conf/example.gitolite.rc +++ b/conf/example.gitolite.rc @@ -45,7 +45,6 @@ $GL_NO_DAEMON_NO_GITWEB = 0; # $GL_ALL_READ_ALL = 0; $GIT_PATH=""; $GL_GITCONFIG_KEYS = ""; -# $GL_GITCONFIG_WILD = 0; $GL_NO_CREATE_REPOS = 0; $GL_NO_SETUP_AUTHKEYS = 0; # $GL_WILDREPOS_DEFPERMS = 'R @all'; @@ -67,6 +66,7 @@ $SVNSERVE = ""; # PLEASE USE SINGLE QUOTES ABOVE, NOT DOUBLE QUOTES $GL_WILDREPOS_PERM_CATS = "READERS WRITERS"; # $GL_SITE_INFO = "XYZ.COM DEVELOPERS: PLEASE SEE http://xyz.com/gitolite/help first"; +# $GL_HOSTNAME = "frodo"; # read doc/mirroring.mkd COMPLETELY before setting this # ------------------------------------------------------------------------------ # rarely changed variables diff --git a/doc/gitolite.conf.mkd b/doc/gitolite.conf.mkd index f8c28b2..9cda16e 100644 --- a/doc/gitolite.conf.mkd +++ b/doc/gitolite.conf.mkd @@ -444,15 +444,21 @@ Gitolite allows you to specify access for git-daemon and gitweb. See (Thanks to teemu dot matilainen at iki dot fi) -Sometimes you want to specify `git config` settings for some of your repos. -For example, you may have a custom post-receive hook that sends an email when -a push happens, and this hook needs to know whom to send the email to, etc. +> ---- -You can set git config values by specifying something like this within a -"repo" paragraph: +> **Note**: this won't work unless the rc file has the right settings; +> please see `$GL_GITCONFIG_KEYS` in [doc/gitolite.rc.mkd][rcdoc] for +> details and security information. -example usage: if you placed a hook in hooks/common that requires -configuration information that is specific to each repo, you could do this: +> ---- + +Sometimes you want to specify `git config` settings for your repos. + +For example, say you have a custom post-receive hook that sends an email when +a push happens, and this hook looks in the config for whom to send the email +to, etc. + +You can set these git config values within a "repo" paragraph: repo gitolite config hooks.mailinglist = gitolite-commits@example.tld @@ -466,11 +472,39 @@ The syntax is simple: This does either a plain "git config section.key value" (for the first 3 examples above) or "git config --unset-all section.key" (for the last -example). Other forms (--add, the `value_regex`, etc) are not supported. +example). Other forms of the `git config` command (`--add`, the +`value_regex`, etc) are not supported. -**Note**: this won't work unless the rc file has the right settings; please -see comments around the variable `$GL_GITCONFIG_KEYS` in doc/gitolite.rc.mkd -for details and security information. +> ---- + +> **WARNING**: simply deleting the config line from the `conf/gitolite.conf` +> file will *not* delete the variable from `repo.git/config`. The syntax in +> the last example is the *only* way to make gitolite execute a +> `--unset-all` operation on the given key. + +> ---- + +You can repeat the 'config' line as many times as you like, and the last +occurrence will be the one in effect. This allows you to override settings +just for one project, as in this example: + + repo @all + config gitolite.mirror.master = "frodo" + config gitolite.mirror.slaves = "sam gollum" + + repo top-secret-project + # only sam, because we don't trust gollum + config gitolite.mirror.slaves = "sam" + +The "delete config variable" syntax can also be used, if you wish: + + repo highlander # there can be only one! + config gitolite.mirror.master = + config gitolite.mirror.slaves = + +As you can see, the general idea is to place the most generic ones (`repo +@all`, or repo patterns like `repo foo.*`) first, and place more specific ones +later to override the generic settings. [rcdoc]: http://sitaramc.github.com/gitolite/doc/gitolite.rc.html diff --git a/doc/gitolite.rc.mkd b/doc/gitolite.rc.mkd index b54ae51..5b442f7 100644 --- a/doc/gitolite.rc.mkd +++ b/doc/gitolite.rc.mkd @@ -154,12 +154,6 @@ on feedback from my users to find or fix issues. `\\.` (two backslashes and a dot). So this is how you'd allow any keys in the "foo" category: `$GL_GITCONFIG_KEYS = "foo\\..*";` - * `$GL_GITCONFIG_WILD`, boolean, default 0 - - This setting allows gitconfig keys even for wild repos. This is an - efficiency issue more than a security issue, since this requires trawling - through all of `$REPO_BASE` looking for stuff :) - * `$GL_NO_CREATE_REPOS`, boolean, default 0 DO NOT CHANGE THIS unless you have other means to create repos and diff --git a/doc/mirroring.mkd b/doc/mirroring.mkd index 94d3170..66debf7 100644 --- a/doc/mirroring.mkd +++ b/doc/mirroring.mkd @@ -1,52 +1,65 @@ -## mirroring a gitolite setup +# mirroring gitolite servers -Mirroring git repos is essentially a one-liner. For each mirror you want to -update, you just add a post-receive hook that says +Mirroring a repo is simple in git; you just need code like this in a +`post-receive` hook in each repo: #!/bin/bash git push --mirror slave_user@mirror.host:/path/to/repo.git + # if running gitolite, the $GL_REPO variable could be useful: + # git push --mirror slave_user@mirror.host:/repo/base/path/$GL_REPO.git -But life is never that simple... - -**This document has been tested using a 3-server setup, all installed using -the "non-root" method (see doc/1-INSTALL.mkd). However, the process is -probably not going to be very forgiving of human error -- like anything that -is this deep in "system admin" territory, errors are likely to be costly. If -you're the kind who hits enter first and then thinks about what he typed, -you're in for some fun times ;-)** - -**On the plus side, everything we do is done using git commands, so things are -never *really* lost until you do a `git gc`**. - ----- - -**Update 2011-03-10**: I wrote this with a typical "corporate" setup in mind -where all the servers involved are owned and administered by the same group of -people. As a result, the scripts assume the servers trust each other -completely. If that is not your situation, you will have to add code into -`gl-mirror-shell` to limit the commands the remote may send. Patches welcome -:-) +For a lot of people, though, mirroring is more than just 'backup', and their +needs are complex enough that setup is hard. ---- In this document: + * why * RULE NUMBER ONE! - * things that will NOT be mirrored by this process - * conventions in this document - * setting up mirroring - * install gitolite on all servers - * generate keypairs - * setup the mirror-shell on each server - * set slaves to slave mode - * set slave server lists - * efficiency versus paranoia - * syncing the mirrors the first time - * switching over - * the return of foo - * switching back - * making foo a slave - * URLs that your users will use + * IMPORTANT cautions + * concepts and terminology + * setup and usage + * server level setup + * repository level setup + * commands to (re-)sync mirrors + * details + * the `conf/gitolite.conf` file + * redirecting pushes + * discussion + * problems with the old mirroring model + * the new mirroring model + * appendix A: example cronjob based mirroring + * appendix B: efficiency versus paranoia + +---- + + + +### why + +This document is useful if: + + * you have multiple repositories spread across multiple sites around the + country/world, and would like developers to access their local server + instead of hitting the WAN, at least for 'fetch' operations. + + * you don't want all your repos mirrored to all the servers for various + reasons, technical or otherwise (epecially true when some of the mirrors + don't belong to you). + + * you want some mirrors to be updated only at certain times of the day, + (with a simple command), instead of every time a push happens. + + * you don't want *one* server being the master server for *all* repos; + instead you want to choose where a repo gets "mastered" based on where the + majority of that repo's users are. + + * you might even, if your servers are all in your control, want the + convenience of them *pushing to a mirror*, and having the push redirect + transparently to the master server. + +As you can see, this is a bit more than a backup solution ;-) @@ -62,285 +75,586 @@ Corollary: if the primary went down and you effected a changeover, you must make sure that the primary does not come up in a push-enabled mode when it recovers. - + -### things that will NOT be mirrored by this process +### IMPORTANT cautions -Let's get this out of the way. This procedure will only mirror your git -repositories, using `git push --mirror`. Therefore, certain files will not be -mirrored: + * For reasons given in the 'discussion' section later, the mirroring process + will never *create* a repo on the receiving side. It has to exist, and be + willing to accept pushes from the master. - * gitolite log files - * "gl-creator" and "gl-perms" files - * "projects.list", "description", and entries in the "config" files within - each repo + In particular, this means that repositories created by end-users ("wild" + repos) *need to be explicitly created* on the mirror (preferably by the + same user, assuming his ssh key works there as well). Once the repo has + been created on the slave, subsequent pushes will be mirrored correctly. -None of these affect actual repo contents of course, but they could be -important, (especially the gl-creator, although if your wildcard pattern had -"CREATOR" in it you can recreate those files easily enough anyway). + * This process will *only* mirror your git repositories, using `git push + --mirror`. It will *not* mirror log files, and repo-specific files like + `gl-creater` and `gl-perms` files, or indeed anything that was manually + created or added (for example, custom config entries added manually + instead of via gitolite). -Your best bet is to use rsync for the log files, and tar for the others, at -regular intervals. + None of these affect actual repo contents of course, but they could be + important, (especially the gl-creator, although if your wildcard pattern + had "CREATOR" in it you can recreate those files easily enough anyway). - + * This document has been tested using a 3-server setup, all installed using + the *non-root* method (see doc/1-INSTALL.mkd). However, the process is + probably not going to be very forgiving of human error -- like anything + that is this deep in "system admin" territory, errors are likely to be + costly. If you're the kind who hits enter first and then thinks about + what he typed, you're in for some fun times ;-) -### conventions in this document + On the plus side, everything we do is done using git commands, so things + are never *really* lost until you do a `git gc`. -The userid hosting gitolite is `gitolite` on all machines. The servers are -foo, bar, and baz. At the beginning, foo is the master, the other 2 are -slaves. + * Mirroring has *not* been, and will not be, tested with gitolite installed + using the deprecated 'from-client' method. Please use one of the other + methods. - + * Also, this has *not* been tested with smart-http. I'm not even sure it'll + work; http is very fiddly to get right. If you want mirroring, at least + your server-to-server comms should be over ssh. -### setting up mirroring + * Finally, this method uses repo-specific `git config` variables to store + the mirroring information. Please read the **WARNING** in the + documentation on [git config commands][rsgc] if you wish to **delete** one + of those lines. - +[rsgc]: http://sitaramc.github.com/gitolite/doc/gitolite.conf.html#_repo_specific_git_config_commands -#### install gitolite on all servers + - * before running the final step in the install sequence, make sure you go to - the `hooks/common` directory and rename `post-receive.mirrorpush` to - `post-receive`. See doc/hook-propagation.mkd if you're not sure where you - should look for `hooks/common`. +### concepts and terminology - * if the server already has gitolite installed, use the normal methods to - make sure this hook gets in. +Servers can host 3 kinds of repos: master, slave, and local. - * Use the same "admin key" on all the machines, so that the same person has - gitolite-admin access to all of them. + * A repo can be a **master** on one and only one server. A repo on its + "master" server is a **native** repo, on slaves it is "non-native". - + * A **slave** repo cannot be pushed to by a user. It will only accept + pushes from a master server. (But see later for an exception). -#### generate keypairs + * A **local** repo is not involved in mirroring at all, in either direction. -Each server will be potentially logging on to one or more of the other -servers, so first generate keypairs on each of them (`ssh-keygen`) and copy -the `.pub` files to all other servers, named appropriately. So foo will have -bar.pub and baz.pub, etc. + - +### setup and usage -#### setup the mirror-shell on each server + -XXX review this document after testing mirroring... +#### server level setup -If you installed gitolite using the from client method, run the following: +To start with, assign each server a short name. We will use 'frodo', 'sam', +and 'gollum' as examples here. - # on foo - export GL_BINDIR=$HOME/.gitolite/src - cat bar.pub baz.pub | - sed -e 's,^,command="'$GL_BINDIR'/gl-mirror-shell" ,' >> ~/.ssh/authorized_keys +1. Generate ssh keys on each machine. Copy the `.pub` files to all other + machines with the appropriate names. I.e., frodo should have sam.pub and + gollum.pub, etc. -If you installed using any of the other 3 methods do this: +2. Install gitolite on all servers, under some 'hosting user' (we'll use + `git` in our examples here). You need not use the same hosting user on + all machines. - # on foo - export GL_BINDIR=`gl-query-rc GL_BINDIR` - cat bar.pub baz.pub | - sed -e 's,^,command="'$GL_BINDIR'/gl-mirror-shell" ,' >> ~/.ssh/authorized_keys + It is not necessary to use the same "admin key" on all the machines. + However, if you do plan to mirror the gitolite-admin repo also, they will + eventually become the same anyway. In our example, frodo does mirror the + admin repo to sam, but not to gollum. (Can you really see frodo or sam + trusting gollum?) -Also do the same thing on the other machines. +3. Now copy `hooks/common/post-receive.mirrorpush` from the gitolite source, + and install it as a custom hook called `post-receive`; see [here][ch] for + instructions. -Now test this access: +4. Edit `~/.gitolite.rc` on each machine and add/edit the following lines. + The `GL_HOSTNAME` variable **must** have the correct name for that host + (frodo, sam, or gollum), so that will definitely be different on each + server. The other line can be the same, or may have additional patterns + for other `git config` keys you have previously enabled. See [here][rsgc] + and the description for `GL_GITCONFIG_KEYS` in [this][vsi] for details. - # on foo - ssh gitolite@bar pwd - # should print /home/gitolite/repositories - ssh gitolite@bar uname -a - # should print the appropriate info for that server + $GL_HOSTNAME = 'frodo'; # will be different on each server! + $GL_GITCONFIG_KEYS = "gitolite.mirror.*"; -Similarly test the other combinations. + (Remember the "rc" file is NOT mirrored; it is meant to be site-local). - + Note: if `GL_HOSTNAME` is undefined, you cannot push to repos which have + the 'gitolite.mirror.master' config variable set. (See 'details' section + below for more info on this variable). -#### set slaves to slave mode +5. On each machine, add the keys for all other machines. For example, on + frodo you'd run these two commands: -Set slave mode on all the *slave* servers by setting `$GL_SLAVE_MODE = 1` -(uncommenting the line if necessary). + gl-tool add-mirroring-peer sam.pub + gl-tool add-mirroring-peer gollum.pub -Leave the master server's file as is. +6. Create "host" aliases on each machine to refer to all other machines. See + [here][ha] for what/why/how. - + The host alias for a host (in other machines' `~/.ssh/config` files) MUST + be the same as the `GL_HOSTNAME` in the referred host's `~/.gitolite.rc`. + Gitolite mirroring **requires** this consistency in naming; things will + NOT work otherwise. -#### set slave server lists + For example, if machine A's `~/.gitolite.rc` says `$GL_HOSTNAME = + 'frodo';`, then all other machines must use a host alias of "frodo" in + their `~/.ssh/config` files to refer to machine A. -On the master (foo), set the names of the slaves by editing the -`~/.gitolite.rc` to contain: +Once you've done this, each host should be able to reach the other hosts and +get a response back. For example, running this on sam: - $ENV{GL_SLAVES} = 'gitolite@bar gitolite@baz'; + ssh frodo info -**Note the syntax well; this is critical**: +should get you - * **this must be in single quotes** (or you must remember to escape the `@`) - * the variable is an ENV var, not a plain perl var - * the values are *space separated* - * each value represents the userid and hostname for one server + Hello sam, I am frodo. -The basic idea is that this string, should be usable in both the following -syntaxes: +Check this command from *everywhere to everywhere else*, and make sure you get +expected results. **Do NOT proceed otherwise.** - git clone gitolite@bar:repo - ssh gitolite@bar pwd + -You can also use ssh host aliases. Let's say server "bar" has a non-standard -port number: +#### repository level setup - # in ~/.ssh/config on foo - host mybar - hostname bar - user gitolite - port 2222 +Setting up mirroring at the repository level instead of at the "entire server" +level gives you a lot of flexibility (see "discussion" section below). - # in ~/.gitolite.rc on foo - $ENV{GL_SLAVES} = 'bar gitolite@baz'; +The basic idea is to use `git config` variables within each repo (gitolite +allows you to create them from within the gitolite.conf file so that's +convenient), and use these to specify which machine is the master and which +machines are slaves for the repo. -And that's really all there is, unless... + - +> Side note: if you just want to **simulate the old mirroring scheme**, +> despite its limitations, it's very easy. Say frodo is the master for all +> repos, and the other 2 are slaves. Just clone the gitolite-admin repos of +> all servers, add these lines to the top of each: -### efficiency versus paranoia + repo @all + config gitolite.mirror.master = "frodo" + config gitolite.mirror.slaves = "sam gollum" + +> then commit, and push all 3. Finally, make a dummy commit on just the +> frodo clone and push again. At this point you can do a one-time manual +> sync (see Appendix A) if you wish but otherwise you're done. + + + +Let's say frodo and sam are internal servers, while gollum is an external (and +therefore less trusted) server that has agreed to help us out by mirroring one +of our high traffic repos. We want the following setup: + + * the "gitolite-admin" repo, as well as an internal project repo called + "ip1", should be mastered on frodo and mirrored to sam. + + * internal project "ip2" has almost all of its developers closer to sam, so + it should be mastered there, and mirrored on frodo. + + * an open source project we manage, "os1", should be mastered on frodo and + mirrored on both sam and gollum. + +So here's how our example would go: + +1. Clone frodo's and sam's gitolite-admin repos to your workstation, then add + the following lines to both their gitolite.conf files: + + repo ip1 gitolite-admin + config gitolite.mirror.master = "frodo" + config gitolite.mirror.slaves = "sam" + + repo ip2 + config gitolite.mirror.master = "sam" + config gitolite.mirror.slaves = "frodo" + + You also need normal access control lines for ip1 and ip2; I'm assuming + you already have them elsewhere, at least on frodo. (What you have on sam + won't matter in a few minutes, as you will see!) + + Commit and push these changes. + +2. There are a couple of quirks to keep in mind when you make changes to the + gitolite-admin repo's config. + + * the first push will create the `git config` entries required, but by + then it is too late to *act* on them; i.e., actually do the mirroring. + If there were any older values, like a different list of slaves + perhaps, then those would be in effect. + + This is largely because git invokes post-receive before post-update. + In theory I can work around this but I do not intend to. + + Anyway, this means that after the 2 pushes, you have to make a dummy + push from frodo: + + git commit --allow-empty -m empty; git push + + which gets you something like this amidst the other messages: + + remote: (25158&) frodo ==== (gitolite-admin) ===> sam + + telling you that frodo is sending gitolite-admin to sam in the + background. + + * the second quirk is that your clone of server sam's gitolite-admin + repo is now completely out of date, since frodo has overwritten it on + the server. You have to 'cd' to that clone and do this: + + git fetch + git reset --hard origin/master + +2. That completes the setup of the gitolite-admin and the internal project + repos. We'll now setup things for the open source project, "os1". + + On frodo's gitolite-admin clone, add the following lines to + `conf/gitolite.conf`, then commit and push: + + repo os1 + config gitolite.mirror.master = "frodo" + config gitolite.mirror.slaves = "sam gollum" + + Also, send the same lines to gollum's administrator and ask him to add + them into his conf/gitolite.conf file, commit, and push. + + + +#### commands to (re-)sync mirrors + +You don't have to put all the slaves in `gitolite.mirror.slaves`. For +example, let's say you have some repos that are very active, and two of your +mirrors that are halfway across the world are getting pushed very frequently. +But you don't need those mirrors to be that closely updated, perhaps *because* +they are halfway across the world and those guys are asleep ;-) + +Or maybe there was a network glitch and even the default slaves are now +lagging, so they need to be manually synced. + +Or a slave realised that one of its repos is lagging for some reason, and +wants to request an immediate update. + +Whatever the reason, you need ways to sync a repo from a command line. Here +are ways to do that: + +1. On the master server, you can start a **background** job to mirror a repo. + The command/syntax is + + gl-mirror-shell request-push reponame [list of keys/slaves] + + The list at the end is optional, and can be a mix of slave names or your + own gitolite mirror config keys. (Yes, you can have any key, named + anything you like, as long as it starts with `gitolite.mirror.`). + + If the list is not supplied, the `gitolite.mirror.slaves` key is used. + + Keys can have values that in turn contain a list of keys/slaves. The list + is recursively *expanded* but recursion is not *detected*. Order is + preserved while duplicates are removed. If you didn't get that, see the + example :-) + + **Warning**: the `gitolite.mirror.slaves` key should have only hosts, no + keys, in it. + + The program exits with a return value of "1" if it found no slaves in the + list passed, otherwise it fires off the background job, prints an + informative message, and exits with a return value of "0". + + We'll take an example. Let's say your gitolite config file has this: + + repo ip1 + config gitolite.mirror.master = "frodo" + config gitolite.mirror.slaves = "sam merry pippin" + config gitolite.mirror.hourly = "sam legolas" + config gitolite.mirror.nightly = "gitolite.mirror.hourly gimli" + config gitolite.mirror.all = "gitolite.mirror.nightly gitolite.mirror.hourly gitolite.mirror.slaves" + + Then the following commands have the results described in comments: + + gl-mirror-shell request-push ip1 + # which is the same as: + gl-mirror-shell request-push ip1 gitolite.mirror.slaves + # pushes to sam, merry, pippin + + gl-mirror-shell request-push ip1 gollum + # pushes only to gollum. Note that gollum is not a member of any of + # the slave lists we defined. + + gl-mirror-shell request-push ip1 gitolite.mirror.slaves gollum + # pushes to sam, merry, pippin, gollum + + gl-mirror-shell request-push ip1 gitolite.mirror.slaves gitolite.mirror.hourly + # pushes to sam, merry, pippin, legolas + + gl-mirror-shell request-push ip1 gitolite.mirror.all + # pushes to sam, legolas, gimli, merry, pippin + + The last two examples show recursive expansion with order-preserving + duplicate removal (hey there's now a published conference paper on + gitolite, so we have to use jargon *somewhere* or they won't accept + follow-on papers!). + + If you do something like this: + + config gitolite.mirror.nightly = "gimli gitolite.mirror.nightly" + + or this: + + config gitolite.mirror.nightly = "gimli gitolite.mirror.hourly" + config gitolite.mirror.hourly = "legolas gitolite.mirror.nightly" + + you deserve what you get. + +2. If you want to start a **foreground** job, the syntax is `gl-mirror-shell + request-push ip1 -fg gollum`. Foreground mode requires one (and only one) + slave name -- you cannot send to an implicit list, nor to more than one + slave. + +3. Cronjobs and custom mirroring schemes are now very easy to do. Use either + of the command forms above and write a script around it. Appendix A + contains an example setup. + +4. Once in a while a slave will realise it needs an update, and wants to ask + for one. It can run this command to do so: + + ssh sam request-push ip2 + + If the requesting server is not one of the slaves listed in the config + variable gitolite.mirror.slaves on the master, it will be rejected. + + This is always a foreground push, reflecting the fact that the slave may + want to know why their push errored out or didn't work last time or + whatever. + + + +### details + + + +#### the `conf/gitolite.conf` file + +One goal I have is to minimise the code changes to "core" gitolite due to +this, so all repo-specific mirror settings are stored as `git config` +variables (you know you can specify git config variables in the gitolite +config file right?). These are: + + * `gitolite.mirror.master` + + The name of the server which is the master for this repo. Each server + will compare this with `$GL_HOSTNAME` (from its own rc file) to + determine if it's the master or a slave. Here're the possible values: + + * **undefined** or `local`: this repo is local to this server + * **same** as `$GL_HOSTNAME`: this server is the "master" for this + repo. (The repo is "native" to this server). + * **not same** as `$GL_HOSTNAME`: this server is a "slave" for the + repo. (The repo is a non-native on this server). + + * `gitolite.mirror.slaves` + + Ignored for non-native repos. For native repos, this is a space-separated + list of servers to push to from the `post-receive` hook. + + Clearly, you can have different sets of slaves for different repos (again, + see "discussion" section later for more on this). + + * `gitolite.mirror.redirectOK` + + See the section on "redirecting pushes" + + * In addition, you can create your own slave lists, named whatever you want, + except they have to start with `gitolite.mirror.`. The section on + "commands to (re-)sync mirrors" has some examples. + + + +### redirecting pushes + +**Please read carefully; there are security implications if you enable this +for mirrors NOT under your control**. + +When a user pushes to a non-native repo, it is possible to transparently +redirect the push to the correct master server. This is a very neat feature, +because now all your users just use one URL (the mirror nearest to them). +They don't need to know where the actual master is, and more importantly, if +you and the other admins change it, they don't need to know it changed! + +The `gitolite.mirror.redirectOK` config variable decides where this +redirection is OK. If it is set to 'true', any valid 'slave' can redirect an +incoming non-native push from a developer. Otherwise, it contains a list of +slaves that are permitted to redirect pushes (this might happen if you don't +trust some of your slaves enough to accept a redirected push from them). + +This check needs to pass on both the master and slave servers; both have a say +in deciding if this is allowed. (The master may have real reasons not to +allow this; see below. I cannot think of any real reason for the *slave* to +disable this, but it's there in case some admin doesn't like it). + +There are some potential issues that you MUST consider before enabling this: + + * (security) If the slave and master server are so different or autonomous + that a user, say "alice", on the slave is not guaranteed to be the same + one as "alice" on the master, then the master admin should NOT enable this + feature. + + This is because, in this scheme, authentication happens on the slave, but + authorisation is on the master. The slave-authenticated userid (alice) is + passed to the master. + + (If you know ssh well enough, you know that the ssh authentication has + already happened, so all we can do is ensure authorisation happens with + whatever username we know so far). + + * If your slave is out of sync with the master for whatever reason, then the + user will get confusing results. A `git fetch` may say everything is + upto-date but the push fails saying it is not a fast-forward push. (Of + course there's a way to fix this; see the "commands to (re-)sync mirrors" + section above). + + * We cannot redirect non-git commands like ADC, setperms, etc because we + don't really have a way of knowing what repo he's talking about (different + commands have different syntaxes, some have more than one reponame...). + Any user who needs to do that should access the end server directly. It + should be easy enough to write an ADC to do the forwarding, in case the + slave server is the only one that can reach the real master due to network + or firewall setup. + + Ideally, I recommend that ad hoc repos not be mirrored at all. Keep + mirroring for "blessed" repos only. + + + +### discussion + + + +#### problems with the old mirroring model + +The old mirroring model had a single server as the master for *all* +repositories. Slaves were effectively only for load-balancing reads, or for +failover if the master died. + +This is not good enough for corporate setups where the developers are spread +fairly evenly across the world. Some repos need to be closer to some teams +(NUMA is a good analogy). + +A model where different repos are "mastered" in different cities is much more +efficient here. + +The old model had other rigidities too, though they're not really *problems*, +as such: + + * the slaves are just slaves; they can't have any "local" repos. + + * a slave had to carry *all* repos; it couldn't choose to carry just a + subset. + + * it implicitly assumed all the mirrors were under the same admin, and that + the gitolite-admin repo was itself mirrored too. + + + +#### the new mirroring model + +In the new model, servers can be much more independent and autonomous than in +the old model. (Don't miss the side note in the 'repository level setup' +section if you prefer the old model). + +The new model has a few pros and cons. The pros come from the flexibility and +freedom that mirrors servers get, and the cons come from authorisation being +more rigorously checked (for example, a slave will only accept a push if *its* +configuration also says that the sending server is indeed the master for this +repo). + + * A mirroring operation will not *create* a repo on the mirror; it has to + exist before a push happens on the master. Typically, the admin on the + slave must create the repo by adding the appropriate lines in his config. + + If your setup is not autonomous (i.e., you're mirroring the admin repo as + well) then this happens automatically for normal repos. However, + *wildcard repos still won't work as seamlessly as in the old model*; see + the first bullet in the 'IMPORTANT cautions' section earlier. + + * The gitolite-admin repo (and config) need not be mirrored. This allows + the slave server admin to create site-local repos, without forcing him to + create a second gitolite install for them. + + (Site-local repos are useful for purely local projects that need + not/should not be mirrored for some reason, or ad-hoc personal repos that + developers create for themselves, etc.) + + * Servers can choose to mirror a subset of the repos from one of the bigger + servers. + + In the open source world, you can imagine more popular repos (or more + popular parts of huge projects like KDE) having more mirrors. Or + substitute "more popular" with "larger in size" if you wish + (FlightGear-data anyone?) + + In the corporate world it could help with jurisdiction issues if the + mirror is in a different country with different laws. + + I'm sure people will find other uses for this. And I'm *positive* the + pros will outweigh the cons. If you don't like it, follow the suggestion + in the side note somewhere up above, and just forget this feature exists + :-) + +---- + + + +### appendix A: example cronjob based mirroring + +Let's say you have some repos that are very active. You're pushing halfway +across the world every few seconds, but those slaves do not need to be that closely +updated, perhaps *because* they are halfway across the world and those guys +are asleep ;-) + +You'd like to update them once an hour instead. Here's how you might do that. + +First add this line to the configuration for those repos: + + config gitolite.mirror.hourly = "slave1 slave2 slave3" + +Then write a cron job that looks like this (untested). + + #!/bin/bash + + REPO_BASE=`${0%/*}/gl-query-rc REPO_BASE` + + cd $REPO_BASE + find . -type d -name "*.git" -prune | while read r + do + # get reponame as gitolite knows it + r=${r:2} + r=${r%.git} + + gl-mirror-shell request-push $r gitolite.mirror.hourly + + # that command backgrounds the push, so you'd best wait a few seconds + # before hitting the next one, otherwise you'll have all your repos + # going out at once! + sleep 10 + done + + + +### appendix B: efficiency versus paranoia If you're paranoid enough to use mirrors, you should be paranoid enough to -like the `receive.fsckObjects` setting we now default to :-) However, informal -tests indicate a 40-50% CPU overhead from this. If you don't like that, -remove that line from the post-receive code. +use the `receive.fsckObjects` setting. However, informal tests indicate a +40-50% CPU overhead from this. If you're ok with that, make the appropriate +adjustments to `GL_GITCONFIG_KEYS` in the rc file, then add this to your +gitolite.conf file: -Please also note that we only set it on mirrors, and that too at the time the -mirrored repo is *created*. This means, when you start using your old "main" -server as a mirror (see later sections on switching over to a mirror, etc.), -it's repos do not have this setting. Repos created by previous versions of -gitolite also will not have this setting. + repo @all + config receive.fsckObjects = "true" Personally, I just set `git config --global receive.fsckObjects true`, since those servers aren't doing anything else anyway, and are idle for long stretches of time. It's upto you what you want to do here. - +[ch]: http://sitaramc.github.com/gitolite/doc/2-admin.html#_custom_hooks +[ha]: http://sitaramc.github.com/gitolite/doc/ssh-troubleshooting.html#_appendix_4_host_aliases +[rsgc]: http://sitaramc.github.com/gitolite/doc/gitolite.conf.html#_repo_specific_git_config_commands +[vsi]: http://sitaramc.github.com/gitolite/doc/gitolite.rc.html#_variables_with_a_security_impact -### syncing the mirrors the first time - -This is fine if you're setting up everything from scratch. But if your master -server already had some repos with commits on them, you have to manually sync -them up once. - - # on foo - gl-mirror-sync gitolite@bar - # path to "sync" program is ~/.gitolite/src if "from-client" install - - - -### switching over - -Let's say foo goes down. You want to make bar the main server, and continue -to have "baz" be a slave. - - * on bar, edit `~/.gitolite.rc` and set - - $GL_SLAVE_MODE = 0; - $ENV{GL_SLAVES} = 'gitolite@baz'; - - * **sanity check**: go to your gitolite-admin clone, add a remote for "bar", - fetch it, and make sure they are the same: - - git remote add bar gitolite@bar:gitolite-admin - git fetch bar - git branch -a -v - # check that all SHAs are the same - - * inform everyone of the new URL for their repos (see next section for more - on this) - - * make sure that if "foo" does come up, it will not immediately start - serving requests. You'll be in trouble if (a) foo comes up as it was - before, and (b) some developer still had the old URL lying around and - started pushing changes to it. - - You could jump in quickly and set `$GL_SLAVE_MODE = 1` as soon as the - system comes up. Better still, use extraneous means to block incoming - connections from normal users (out of scope for this document). - - - -### the return of foo - - - -#### switching back - -Switching back is fairly easy. - - * synchronise all repos from bar to foo. This may take some time, depending - on how long foo was down. - - # on bar - gl-mirror-sync gitolite@foo - # path to "sync" program is ~/.gitolite/src if "from-client" install - - * turn off pushes on "bar" by setting slave mode to 1 - * run the sync once again; this should complete quickly - - * **double check by comparing some the repos on both sides if needed**. You - could run the following snippet on all servers for a quick check: - - cd ~/repositories # or wherever $REPO_BASE is - find . -type d -name "*.git" | sort | - while read r - do - echo $r - git ls-remote $r | sort - done | md5sum - - * on foo, set the slave list (or check that it is correct) - * on foo, set slave mode off - * tell everyone to switch back - - - -#### making foo a slave - -If "foo" does come up in a controlled manner, you might not want to switch -back right away. Unless you're doing DNS tricks, users may be peeved at -having to do 2 switches. - -If you want to make foo a slave, you know the drill by now: - - * set slave mode to 1 on foo - * on bar, add foo as a slave - - # in ~/.gitolite.rc on bar - $ENV{GL_SLAVES} = 'gitolite@foo gitolite@baz'; - -I think that should cover pretty much everything. I *have* tested most of -this, but YMMV. - ----- - - - -### URLs that your users will use - -Unless you play DNS tricks, it is more than likely that your users would have -to change the URLs they use to access their repos if you change the server -they push to. - -I cannot speak for the plethora of git client software out there but for -normal git, this problem can be mitigated somewhat by doing this: - - * in `~/.ssh/config` on my workstation, I have - - host gl - hostname=primary.server.ip - user=gitolite - - * all my `git clone` commands use `gl:reponame` as the URL - - * if the primary goes down, and I have to access the secondary, I just - change the `hostname` line in `~/.ssh/config`. - -That's it. Every clone of every repo used anywhere in this userid is now -changed. - -To repeat, this may or may not work with all the git clients that exist (like -jgit, or any of the GUI tools, and especially if you're on Windows). - -If anyone has a better idea, something that works more universally, I'd love -to hear it. diff --git a/hooks/common/post-receive.mirrorpush b/hooks/common/post-receive.mirrorpush index 6be6d81..6914f0e 100755 --- a/hooks/common/post-receive.mirrorpush +++ b/hooks/common/post-receive.mirrorpush @@ -8,19 +8,17 @@ # if you don't do this, git-shell sometimes dies of a signal 13 (SIGPIPE) [ -t 0 ] || cat >/dev/null -if [ -n "$GL_SLAVES" ] -then - for mirror in $GL_SLAVES - do - if git push --mirror $mirror:$GL_REPO.git - then - : - else - ssh $mirror mkdir -p $GL_REPO.git - ssh $mirror git init --bare $GL_REPO.git - ssh $mirror "cd $GL_REPO.git; git config receive.fsckObjects true" - git push --mirror $mirror:$GL_REPO.git || - echo "WARNING: mirror push to $mirror failed" - fi - done -fi >&2 +# even slaves have post-receive hooks, but due to the way the push happens, we +# don't have GL_REPO set. So we detect that generic situation and bail... +[ -n "$GL_BYPASS_UPDATE_HOOK" ] && exit 0 +# CAUTION: this means that a server-side push (bypassing gitolite) will not be +# mirrored automatically because (a) we don't know GL_REPO (we can deduce it +# but we won't!), and (b) we can't distinguish easily between that and this +# case (the slave receiving a mirror push case) + +[ -z "$GL_REPO" ] && die GL_REPO not set +[ -z "$GL_BINDIR" ] && die GL_BINDIR not set + +slaves=`git config --get gitolite.mirror.slaves` +[ -z "$slaves" ] && exit 0 +$GL_BINDIR/gl-mirror-push $GL_REPO $slaves diff --git a/src/gitolite.pm b/src/gitolite.pm index 191cbfa..3c6e65d 100644 --- a/src/gitolite.pm +++ b/src/gitolite.pm @@ -22,11 +22,16 @@ use Exporter 'import'; setup_git_configs setup_gitweb_access shell_out + slurp special_cmd try_adc wrap_chdir wrap_open wrap_print + + mirror_mode + mirror_listslaves + mirror_redirectOK ); @EXPORT_OK = qw( %repos @@ -159,7 +164,8 @@ sub log_it { $logmsg .= "\t@_" if @_; # erm... this is hard to explain so just see the commit message ok? $logmsg =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\xFF]+)/sprintf "<>","",$1/ge; - print $log_fh "$ENV{GL_TS}\t$ENV{GL_USER}\t$ip\t$logmsg\n"; + my $user = $ENV{GL_USER} || "(no user)"; + print $log_fh "$ENV{GL_TS}\t$user\t$ip\t$logmsg\n"; close $log_fh or die "close log failed: $!\n"; } @@ -420,12 +426,24 @@ sub setup_git_configs { my ($repo, $git_configs_p) = @_; - while ( my ($key, $value) = each(%{ $git_configs_p->{$repo} }) ) { - if ($value ne "") { - $value =~ s/^"(.*)"$/$1/; - system("git", "config", $key, $value); - } else { - system("git", "config", "--unset-all", $key); + # new_wild calls us without checking! + return unless $git_configs_p->{$repo}; + + # git_configs_p is a ref to a hash whose elements look like + # {"reponame"}{sequence_number}{"key"} = "value"; + + my %rch = %{ $git_configs_p->{$repo} }; + # %rch has elements that look like {sequence_number}{"key"} = "value" + for my $seq (sort { $a <=> $b } keys %rch) { + # and the final step is the repo config: {"key"} = "value" + my $rc = $rch{$seq}; + while ( my ($key, $value) = each(%{ $rc }) ) { + if ($value ne "") { + $value =~ s/^"(.*)"$/$1/; + system("git", "config", $key, $value); + } else { + system("git", "config", "--unset-all", $key); + } } } } @@ -1182,6 +1200,49 @@ sub ext_cmd_svnserve die "svnserve exec failed\n"; } +# ---------------------------------------------------------------------------- +# MIRRORING HELPERS +# ---------------------------------------------------------------------------- + +sub mirror_mode { + my $repo = shift; + + # 'local' is the default if the config is empty or not set + my $gmm = `git config --file $REPO_BASE/$repo.git/config --get gitolite.mirror.master` || 'local'; + chomp $gmm; + return 'local' if $gmm eq 'local'; + return 'master' if $gmm eq ( $GL_HOSTNAME || '' ); + return "slave of $gmm"; +} + +sub mirror_listslaves { + my $repo = shift; + + return ( `git config --file $REPO_BASE/$repo.git/config --get gitolite.mirror.slaves` || '' ); +} + +# is a redirect ok for this repo from this slave? +sub mirror_redirectOK { + my $repo = shift; + my $slave = shift || return 0; + # if we don't know who's asking, the answer is "no" + + my $gmrOK = `git config --file $REPO_BASE/$repo.git/config --get gitolite.mirror.redirectOK` || ''; + chomp $gmrOK; + my $slavelist = mirror_listslaves($repo); + + # if gmrOK is 'true', any valid slave can redirect + return 1 if $gmrOK eq 'true' and $slavelist =~ /(^|\s)$slave(\s|$)/; + # otherwise, gmrOK is a list of slaves who can redirect + return 1 if $gmrOK =~ /(^|\s)$slave(\s|$)/; + + return 0; + + # LATER/NEVER: include a call to an external program to override a 'true', + # based on, say, the time of day or network load etc. Cons: shelling out, + # deciding the name of the program (yet another rc var?) +} + # ------------------------------------------------------------------------------ # per perl rules, this should be the last line in such a file: 1; diff --git a/src/gitolite_rc.pm b/src/gitolite_rc.pm index f055f65..01bcba2 100644 --- a/src/gitolite_rc.pm +++ b/src/gitolite_rc.pm @@ -17,12 +17,13 @@ use Exporter 'import'; $ADMIN_POST_UPDATE_CHAINS_TO $ENV $GITOLITE_BASE $GITOLITE_PATH $GIT_PATH $GL_ADC_PATH $GL_ADMINDIR $GL_ALL_INCLUDES_SPECIAL $GL_ALL_READ_ALL $GL_BIG_CONFIG $GL_CONF $GL_CONF_COMPILED $GL_GET_MEMBERSHIPS_PGM - $GL_GITCONFIG_KEYS $GL_GITCONFIG_WILD $GL_KEYDIR $GL_LOGT $GL_NICE_VALUE + $GL_GITCONFIG_KEYS $GL_KEYDIR $GL_LOGT $GL_NICE_VALUE $GL_NO_CREATE_REPOS $GL_NO_DAEMON_NO_GITWEB $GL_NO_SETUP_AUTHKEYS $GL_PACKAGE_CONF $GL_PACKAGE_HOOKS $GL_PERFLOGT $GL_SITE_INFO $GL_SLAVE_MODE $GL_WILDREPOS $GL_WILDREPOS_DEFPERMS $GL_WILDREPOS_PERM_CATS $HTPASSWD_FILE $PROJECTS_LIST $REPO_BASE $REPO_UMASK $RSYNC_BASE $SVNSERVE $UPDATE_CHAINS_TO $AUTH_OPTIONS + $GL_HOSTNAME $GL_HTTP_ANON_USER ); @@ -31,7 +32,7 @@ use Exporter 'import'; # real constants # ------------------------------------------------------------------------------ -$current_data_version = '1.7'; +$current_data_version = '2.0'; $ABRT = "\n\t\t***** ABORTING *****\n "; $WARN = "\n\t\t***** WARNING *****\n "; @@ -72,6 +73,11 @@ do $ENV{GL_RC} or die "error parsing $ENV{GL_RC}\n"; # fix up REPO_BASE $REPO_BASE = "$ENV{HOME}/$REPO_BASE" unless $REPO_BASE =~ m(^/); +# backward incompat detection for mirroring. Normally I wouldn't do +# this but this is *important* +die "$ABRT Mirroring has completely changed in this version.\tYou need to check the documentation for how to upgrade\n" + if (defined $GL_SLAVE_MODE or exists $ENV{GL_SLAVES}); + # ------------------------------------------------------------------------------ # per perl rules, this should be the last line in such a file: 1; diff --git a/src/gl-auth-command b/src/gl-auth-command index 60f0e40..61b2f5a 100755 --- a/src/gl-auth-command +++ b/src/gl-auth-command @@ -93,10 +93,6 @@ unless ($ENV{SSH_ORIGINAL_COMMAND}) { $ENV{SSH_ORIGINAL_COMMAND} = 'info'; } -# slave mode should not do much -die "server is in slave mode; you can only fetch\n" - if ($GL_SLAVE_MODE and $ENV{SSH_ORIGINAL_COMMAND} !~ /^(info|expand|get|git-upload-)/); - # admin defined commands; please see doc/admin-defined-commands.mkd if ($GL_ADC_PATH and -d $GL_ADC_PATH) { try_adc(); # if it succeeds, this also 'exec's out @@ -139,6 +135,19 @@ $ENV{GL_REPO}=$repo; # the real git commands (git-receive-pack, etc...) # ---------------------------------------------------------------------------- +# we know the user and repo; we just need to know what perm he's trying for +# (aa == attempted access; setting this makes some later logic simpler) +my $aa = ($verb =~ $R_COMMANDS ? 'R' : 'W'); + +# writes may get redirected under certain conditions +if ( $aa eq 'W' and mirror_mode($repo) =~ /^slave of (\S+)/ ) { + my $master = $1; + die "$ABRT GL_HOSTNAME not set; rejecting push to non-local repo\n" unless $GL_HOSTNAME; + die "$ABRT $GL_HOSTNAME not the master, please push to $master\n" unless mirror_redirectOK($repo, $GL_HOSTNAME); + print STDERR "$GL_HOSTNAME ==== $user ($repo) ===> $master\n"; + exec("ssh", $master, "USER=$user", "SOC=$ENV{SSH_ORIGINAL_COMMAND}"); +} + # first level permissions check my ($perm, $creator, $wild); @@ -150,9 +159,6 @@ if ( $GL_ALL_READ_ALL and $verb =~ $R_COMMANDS and -d "$REPO_BASE/$repo.git") { # it was missing, and you have create perms, so create it new_wild_repo($repo, $user) if ($perm =~ /C/); -# we know the user and repo; we just need to know what perm he's trying for -# (aa == attempted access) -my $aa = ($verb =~ $R_COMMANDS ? 'R' : 'W'); die "$aa access for $repo DENIED to $user (Or there may be no repository at the given path. Did you spell it correctly?)\n" unless $perm =~ /$aa/; diff --git a/src/gl-compile-conf b/src/gl-compile-conf index 56d023c..273786f 100755 --- a/src/gl-compile-conf +++ b/src/gl-compile-conf @@ -49,8 +49,9 @@ open STDOUT, ">", "/dev/null" if (@ARGV and shift eq '-q'); # names of repos whose ACLs don't make it into the main compiled config file # copy above desc to lite.pm -- my %split_conf = (); -# rule sequence number +# rule and config sequence numbers my $rule_seq = 0; +my $config_seq = 0; # ... having been forced to use a list as described above, we lose some # efficiency due to the possibility of the same {ref, perms} pair showing up @@ -244,21 +245,11 @@ sub parse_conf_line die "$ABRT git config $key not allowed\ncheck GL_GITCONFIG_KEYS in the rc file for how to allow it\n" if (@matched < 1); for my $repo (@{ $repos_p }) # each repo in the current stanza { - $git_configs{$repo}{$key} = $value; + $git_configs{$repo}{$config_seq++}{$key} = $value; # force entry in %repos. Without this, a repo para with just a # config line and no ACLs gets ignored in the output $repos{$repo}{HAS_CONFIG} = 1; - - # no problem if it's a plain repo (non-pattern, non-groupname) - # OR wild configs are allowed - unless ( ($repo =~ $REPONAME_PATT and $repo !~ /^@/) or $GL_GITCONFIG_WILD) { - my @r = ($repo); # single wildpatt - @r = sort keys %{ $groups{$repo} } if $groups{$repo}; # or a group; get its members - do { - warn "$WARN git config set for $_ but \$GL_GITCONFIG_WILD not set\n" unless $_ =~ $REPONAME_PATT - } for @r; - } } } # include diff --git a/src/gl-mirror-push b/src/gl-mirror-push new file mode 100755 index 0000000..2ae9c80 --- /dev/null +++ b/src/gl-mirror-push @@ -0,0 +1,83 @@ +#!/bin/sh + +# arguments: reponame, list of slaves + +# optional flag after reponame: "-fg" to run in foreground. This is only +# going to be given by one specific invocation, and if given will only work +# for one slave. + +# if list of slaves not given, get it from '...slaves' config + +die() { echo gl-mirror-push${hn:+ on $hn}: "$@" >&2; exit 1; } +get_rc_val() { ${0%/*}/gl-query-rc $1; } + +# ---------- + +# is mirroring even enabled? +hn=`get_rc_val GL_HOSTNAME` +[ -z "$hn" ] && exit + +# we should not be invoked directly from the command line +[ -z "$GL_LOG" ] && die fatal: do not run $0 directly + +# ---------- + +# get repo name then check if it's a local or slave (ie we're not the master) +[ -z "$1" ] && die fatal: missing reponame argument +repo=$1; shift + +REPO_BASE=`get_rc_val REPO_BASE` +cd $REPO_BASE/$repo.git 2>/dev/null || die fatal: could not change directory to "$repo" +gmm=`git config --get gitolite.mirror.master` + +# is it local? (remember, empty/undef ==> local +gmm=${gmm:-local} +[ "$gmm" = "local" ] && exit + +# is it a slave? +[ "$hn" = "$gmm" ] || die fatal: wrong master. Try $gmm... + +# ---------- + +# now see if we want to be foregrounded. Fg mode accepts only one slave +[ "$1" = "-fg" ] && { + [ -z "$2" ] && die fatal: missing slavename argument + [ -n "$3" ] && die fatal: too many slavenames + git push --mirror $2:$repo 2>&1 | sed -e "s/^/$hn:/" + exit +} + +# ---------- + +# normal (self-backgrounding) mode, one or more slaves + +[ -z "$1" ] && die fatal: missing list of slaves +export slaves +slaves="$*" + +# ---------- + +# print out the job ID, then redirect all 3 FDs +export job_id=$$ # can change to something else if needed +echo "($job_id&) $hn ==== ($repo) ===>" $slaves >&2 +logfile=${GL_LOG/%.log/-mirror-pushes.log} +exec >>$logfile 2>&1 ' + + for s in $slaves + do + [ "$s" = "$hn" ] && continue # skip ourselves + git push --mirror $s:$repo || echo ==== WARNING: RC=$? from git push --mirror $s:$repo ==== + done 2>&1 | sed -e "s/^/ /" + echo `date +%T` '===>' $slaves + echo + ) 2>&1 | sed -e "s/^/$job_id:/" & # background the whole thing +) diff --git a/src/gl-mirror-shell b/src/gl-mirror-shell index e72f4ad..6a0dfde 100755 --- a/src/gl-mirror-shell +++ b/src/gl-mirror-shell @@ -1,30 +1,168 @@ -#!/bin/bash +#!/usr/bin/perl -export GL_BYPASS_UPDATE_HOOK -GL_BYPASS_UPDATE_HOOK=1 +# terminology: +# native repo: a repo for which we are the master; pushes happen here +# authkeys: shorthand for ~/.ssh/authorized_keys -get_rc_val() { - ${0%/*}/gl-query-rc $1 +# this is invoked in one of two ways: + +# (1) locally, from a shell script or command line + +# (2) from a remote server, via authkeys, with one argument (the name of the +# sending server), similar to what happens with normal users and the +# 'gl-auth-command' program. SSH_ORIGINAL_COMMAND will then contain the +# actual command that the remote sent. +# +# Currently, these commands are (a) 'info', (b) 'git-receive-pack' when a +# mirror push is *received* by a slave, (c) 'request-push' sent by a slave +# (possibly via an ADC) when the slave finds itself out of sync, (d) a +# redirected push, from a user pushing to a slave, which is represented not by +# a command per se but by starting with "USER=..." + +use strict; +use warnings; + +# ---------------------------------------------------------------------------- +# this section of code snarfed from gl-auth-command +# XXX add this program to 'that bindir thing' in doc/developer-notes.mkd +BEGIN { + $0 =~ m|^(/)?(.*)/| and $ENV{GL_BINDIR} = ($1 || "$ENV{PWD}/") . $2; } -REPO_BASE=$( get_rc_val REPO_BASE) -REPO_UMASK=$(get_rc_val REPO_UMASK) +use lib $ENV{GL_BINDIR}; -umask $REPO_UMASK +use gitolite_rc; +use gitolite_env; +use gitolite; -if echo $SSH_ORIGINAL_COMMAND | egrep git-upload\|git-receive >/dev/null -then +setup_environment(); +die "fatal: GL_HOSTNAME not set in rc; mirroring disabled\n" unless $GL_HOSTNAME; - # the (special) admin post-update hook needs these, so we cheat - export GL_RC - export GL_ADMINDIR - export GL_BINDIR - GL_RC=$( get_rc_val GL_RC) - GL_ADMINDIR=$(get_rc_val GL_ADMINDIR) - GL_BINDIR=$( get_rc_val GL_BINDIR) +# ---------------------------------------------------------------------------- - SSH_ORIGINAL_COMMAND=`echo $SSH_ORIGINAL_COMMAND | sed -e "s:':'$REPO_BASE/:"` - exec git shell -c "$SSH_ORIGINAL_COMMAND" -else - bash -c "cd $REPO_BASE; $SSH_ORIGINAL_COMMAND" -fi +# deal with local invocations first + +# on the "master", run from a shell, for one specific repo, with an optional +# list of slaves, like so: +# gl-mirror-shell request-push some-repo [optional list of slaves/keys] +if ( ($ARGV[0] || '') eq 'request-push' and not $ENV{SSH_ORIGINAL_COMMAND} ) { + shift; + my $repo = shift or die "fatal: missing reponame\n"; + -d "$REPO_BASE/$repo.git" or die "fatal: no such repo?\n"; + + # this is the default argument if no slave list or key is supplied + @ARGV = ('gitolite.mirror.slaves') unless @ARGV; + + my @slaves = (); + my %seen = (); + # each argument in @ARGV is either a slave name, or a gitolite mirroring + # key to be replaced with its value, split into a list of slaves + while (@ARGV) { + $a = shift @ARGV; + if ($a =~ /^gitolite\.mirror\.[\w.-]+$/) { + my @values = split(' ', `git config --file $REPO_BASE/$repo.git/config --get $a` || ''); + unshift @ARGV, @values; + } else { + push @slaves, $a unless $seen{$a}++; + } + } + + exit 1 unless @slaves; + # we don't want to complain louder than that because the most common + # use of this script on the master server is via cron, run against + # *all* known repos without checking their individual key values + + print STDERR "info: mirror-push $repo ", join(" ", @slaves), "\n"; + system("gl-mirror-push", $repo, @slaves); + + exit 0; +} + +unless (@ARGV) { print STDERR "fatal: missing command\n"; exit 1; } + +# ---------- + +# now the remote invocations; log it, then get the sender name +my $sender = shift; +$ENV{GL_USER} ||= "host:$sender"; +# default SSH_ORIGINAL_COMMAND is 'info', as usual +$ENV{SSH_ORIGINAL_COMMAND} ||= 'info'; +# and it's too long to bloody type... +my $soc = $ENV{SSH_ORIGINAL_COMMAND}; +log_it(); + +# ---------- + +# our famous 'info' command +if ($soc eq 'info') { + print STDERR "Hello $sender, I am $GL_HOSTNAME\n"; + + exit; +} + +# ---------- + +# when running on the "slave", we have to "receive" the `git push --mirror` +# from a master. Check that the repo is indeed a slave and the sender is the +# correct master before allowing the push. + +if ($soc =~ /^git-receive-pack '(\S+)'$/) { + my $repo = $1; + die "fatal: invalid characters in $repo\n" unless $repo =~ $REPONAME_PATT; + my $mm = mirror_mode($repo); + + # reminder: we're not going through the slave-side gl-auth-command. This + # is a server-to-server transaction, with an authenticated sender. + # Authorisation consists of checking to make sure our config says this + # sender is indeed the master for this repo + die "$ABRT fatal: $GL_HOSTNAME <==//== $sender mirror-push rejected: $repo is $mm\n" unless $mm eq "slave of $sender"; + print STDERR "$GL_HOSTNAME <=== ($repo) ==== $sender\n"; + + $ENV{GL_BYPASS_UPDATE_HOOK} = 1; + # replace the repo path with the full path and hand off to git-shell + $soc =~ s(')('$ENV{GL_REPO_BASE_ABS}/); + exec("git", "shell", "-c", $soc); +} + +# ---------- + +# a slave may have found itself out of sync (perhaps the network was down at +# the time of the last push to the master), and now wants to request a sync. +# This is similar to the "local invocation" described above, but we check the +# sender name against gitolite.mirror.slaves to prevent some random slave from +# asking for a repo it should not be having! + +if ($soc =~ /^request-push (\S+)$/) { + my $repo = $1; + die "fatal: invalid characters in $repo\n" unless $repo =~ $REPONAME_PATT; + die "$ABRT fatal: $GL_HOSTNAME ==//==> $sender refused: not in slave list\n" unless mirror_listslaves($repo) =~ /(^|\s)$sender(\s|$)/; + print STDERR "$GL_HOSTNAME ==== ($repo) ===> $sender\n"; + # just one sender, and we've checked that he is "on the list". Foreground... + system("$ENV{GL_BINDIR}/gl-mirror-push", $repo, "-fg", $sender); + + exit; +} + +# ---------- + +# experimental feature... + +# when running on the "master", receive a redirected push from a slave. This +# is disabled by default and needs to be explicitly enabled on both the master +# and the slave. SEE DOCUMENTATION FOR CAVEATS AND CAUTIONS. + +if ($soc =~ /^USER=(\S+) SOC=(git-receive-pack '(\S+)')$/) { + + my $user = $1; + $ENV{SSH_ORIGINAL_COMMAND} = $2; + my $repo = $3; + die "fatal: invalid characters in $user\n" unless $user =~ $USERNAME_PATT; + die "fatal: invalid characters in $repo\n" unless $repo =~ $REPONAME_PATT; + die "$ABRT fatal: $GL_HOSTNAME <==//== $sender redirected push rejected\n" unless mirror_redirectOK($repo, $sender); + print STDERR "$GL_HOSTNAME <=== $user ($repo) ==== $sender\n"; + + my $pgm = $0; + $pgm =~ s([^/]+$)(gl-auth-command); + + exec($pgm, $user); +} diff --git a/src/gl-mirror-sync b/src/gl-mirror-sync deleted file mode 100755 index e946ec0..0000000 --- a/src/gl-mirror-sync +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -mirror=$1 -[ -z "$1" ] && { echo need \"user@host\" or ssh hostalias; exit 1; } -ssh -o PasswordAuthentication=no $mirror echo hello-there | grep hello-there >/dev/null || - { echo I cant ssh to $mirror; exit 1; } - -cd $HOME -REPO_BASE=`${0%/*}/gl-query-rc REPO_BASE` -cd $REPO_BASE - -ssh $mirror cat \$HOME/.gitolite.rc | expand | egrep '^ *\$GL_SLAVE_MODE *= *1; *$' >/dev/null || { - echo $mirror does not seem to be in slave mode - exit 1; -} - -find . -type d -name "*.git" -prune | cut -c3- | sort | while read r -do - cd $HOME; cd $REPO_BASE; cd $r - printf "$r " - - if [ `git rev-parse HEAD` = "HEAD" ] - then - echo is empty\; skipping - continue - fi - - # this is essentially the same code as in the post-receive hook - if git push --mirror $mirror:$r - then - : - else - ssh $mirror mkdir -p $r - ssh $mirror git init --bare $r - git push --mirror $mirror:$r || - echo "WARNING: mirror push to $mirror failed" - fi < /dev/null -done diff --git a/src/gl-tool b/src/gl-tool index 923eb03..256a47a 100755 --- a/src/gl-tool +++ b/src/gl-tool @@ -1,75 +1,106 @@ -#!/bin/sh +#!/usr/bin/perl -w -# BEGIN USAGE +# help/instructions are at the bottom, in the __DATA__ section -# $0 -- make some server side tasks easier +use strict; +use warnings; -# Usage: -# $0 [sub-command [args]] +use FindBin; +BEGIN { $ENV{GL_BINDIR} = $FindBin::Bin; } -# Security notes: this program does not do any sanitisation of input. You're -# running it at the CLI on the server, so you already have the power to do -# whatever you want anyway. +use lib $ENV{GL_BINDIR}; +use gitolite_rc; +use gitolite; -# current sub-commands: +sub usage { print ; exit 1; } +usage() unless (@ARGV); -# (1) REPLACE THE OLD $SHELL_USERS MECHANISM -# -# $0 shell-add foo.pub -# -# adds the pubkey in foo.pub into the authkeys file with "-s" argument (shell -# access) and user "foo". The line will be added *before* the "# gitolite -# start" section, so that a gitolite-admin push will not affect it. +my $cmd = shift; +my $pub = shift; -# Although there is no "shell-remove" sub-command, you can do that quite -# easily by editing ~/.ssh/authorized_keys and deleting the appropriate line. - -# END USAGE - - -die() { echo "$@"; exit 1; } >&2 - -if [ -z "$1" ] -then - perl -ne 's/\$0/$ARGV/ge; print if /BEGIN USAGE/../END USAGE/' $0 | grep -v USAGE | cut -c3- - exit 1 -fi - -if [ "$1" = "shell-add" ] -then +if ($cmd eq 'add-shell-user' or $cmd eq 'add-mirroring-peer') { # sanity checks - [ -z "$2" ] && exec $0 - [ -f "$2" ] || die "$2 does not exist" - wc -l < $2 | grep '^1$' >/dev/null || die "$2 contains more than one line" + $pub or usage(); + my $user = validate_pubkeyfile($pub); + + # write the file out, with the new authkeys line added just *before* the + # gitolite section. But first, set the command that gets invoked + $cmd = ( $cmd eq 'add-shell-user' ? 'gl-auth-command -s' : 'gl-mirror-shell' ); + ak_insert($cmd, $user, $pub); + + exit 0; +} + +die "could not understand command $cmd\n"; + +sub validate_pubkeyfile { + my $pub = shift; + + -f $pub or die "$pub does not exist\n"; + die "$pub contains more than one line\n" if wc_l($pub) > 1; + + my $user = $pub; + $user =~ s(^.*/)(); # remove optional directory + die "file name must end in .pub\n" unless $user =~ /(.*)\.pub$/; + $user = $1; + + return $user; +} + +sub ak_insert { + my ($cmd, $user, $pub) = @_; # must be kept consistent with what's in src/gl-compile-conf; on the plus # side, it's not likely to change anytime soon! - AUTH_OPTIONS="no-port-forwarding,no-X11-forwarding,no-agent-forwarding" + my $AUTH_OPTIONS = "no-port-forwarding,no-X11-forwarding,no-agent-forwarding"; - GL_BINDIR=`${0%/*}/gl-query-rc GL_BINDIR` + my $authline = "command=\"$ENV{GL_BINDIR}/$cmd $user\",$AUTH_OPTIONS " . slurp($pub); - pubkey_file=$2 - user=`basename $pubkey_file .pub` + my $authkeys = "$ENV{HOME}/.ssh/authorized_keys"; + my $ak_lines = slurp($authkeys); + $ak_lines =~ s/^.*$cmd $user.*\n//m; # remove existing keyline, if present + $ak_lines =~ s/^# gitolite start/$authline# gitolite start/m; + my $akfh = wrap_open(">", $authkeys); + print $akfh $ak_lines; + close $akfh; +} - authline="command=\"$GL_BINDIR/gl-auth-command -s $user\",$AUTH_OPTIONS `cat $pubkey_file`"; +sub wc_l { + my $fh = wrap_open("<", shift); + my @l = <$fh>; + my $l = @l; + return $l; +} - authkeys=$HOME/.ssh/authorized_keys +__DATA__ - for i in 1 - do - perl -lne "last if /# gitolite start/; print unless /gl-auth-command -s $user/; " $authkeys - echo $authline - perl -lne "print if /# gitolite start/ .. 0; " $authkeys - done > $authkeys.new +gl-tool -- make some server side tasks easier - diff -u $authkeys $authkeys.new && die no change to authkey file - echo - echo If the above diff looks ok, press enter. Else press Ctrl-C. - read dummy - cat $authkeys > $authkeys.old - cat $authkeys.new > $authkeys +Usage: + gl-tool [sub-command [args]] - exit 0 -fi +Security notes: this program does not do any sanitisation of input. You're +running it at the CLI on the server, so you already have the power to do +whatever you want anyway. -die "could not understand command $1" +current sub-commands: + +(1) REPLACE THE OLD $SHELL_USERS MECHANISM + + gl-tool add-shell-user foo.pub + +Adds the pubkey in foo.pub into the authkeys file with "-s" argument (shell +access) and user "foo". The line will be added *before* the "# gitolite +start" section, so that a gitolite-admin push will not affect it. + +Although there is no "remove-shell-user" sub-command, you can do that quite +easily by editing ~/.ssh/authorized_keys and deleting the appropriate line. + +(2) ADD A MIRRORING PEER KEY + + gl-tool add-mirroring-peer git@server.company.com.pub + +As above, but the given key will invoke 'gl-mirror-shell' instead of the +usual 'gl-auth-command'. This is meant to be a server-to-server key, allowing +(in this example), the gitolite server called 'git@server.company.com' to +access this server for mirroring operations. diff --git a/t/out/t01-repo-groups.1 b/t/out/t01-repo-groups.1 index 2124ba6..cce7523 100644 --- a/t/out/t01-repo-groups.1 +++ b/t/out/t01-repo-groups.1 @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = ( 'aa' => { 'R' => { diff --git a/t/out/t01-repo-groups.1b b/t/out/t01-repo-groups.1b index 3275bcf..3710cd3 100644 --- a/t/out/t01-repo-groups.1b +++ b/t/out/t01-repo-groups.1b @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = ( 'aa' => { 'R' => { diff --git a/t/out/t01-repo-groups.1bs b/t/out/t01-repo-groups.1bs index a606c7a..3600f81 100644 --- a/t/out/t01-repo-groups.1bs +++ b/t/out/t01-repo-groups.1bs @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = (); %split_conf = ( 'aa' => 1, diff --git a/t/out/t01-repo-groups.2 b/t/out/t01-repo-groups.2 index 25131c9..f8b6140 100644 --- a/t/out/t01-repo-groups.2 +++ b/t/out/t01-repo-groups.2 @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = ( '@g1' => { '@g1' => [ diff --git a/t/out/t02-user-groups.1 b/t/out/t02-user-groups.1 index 05ba1fb..de27571 100644 --- a/t/out/t02-user-groups.1 +++ b/t/out/t02-user-groups.1 @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = ( 'aa' => { 'R' => { diff --git a/t/out/t02-user-groups.1b b/t/out/t02-user-groups.1b index 16d4f1b..1f0ba5d 100644 --- a/t/out/t02-user-groups.1b +++ b/t/out/t02-user-groups.1b @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = ( 'aa' => { 'R' => { diff --git a/t/out/t02-user-groups.1bs b/t/out/t02-user-groups.1bs index 9a36428..2c8af39 100644 --- a/t/out/t02-user-groups.1bs +++ b/t/out/t02-user-groups.1bs @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = (); %split_conf = ( 'aa' => 1, diff --git a/t/out/t02-user-groups.2 b/t/out/t02-user-groups.2 index b4d5fd1..5e9a3c3 100644 --- a/t/out/t02-user-groups.2 +++ b/t/out/t02-user-groups.2 @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = ( 'aa' => { '@g1' => [ diff --git a/t/out/t02-user-groups.2bs b/t/out/t02-user-groups.2bs index 09e1a33..7264595 100644 --- a/t/out/t02-user-groups.2bs +++ b/t/out/t02-user-groups.2bs @@ -1,4 +1,4 @@ -$data_version = '1.7'; +$data_version = '2.0'; %repos = (); %groups = ( '@g1' => { diff --git a/t/t55-repo-configs-wild-without-CREATOR b/t/t55-repo-configs-wild-without-CREATOR index e3abb13..e49effa 100644 --- a/t/t55-repo-configs-wild-without-CREATOR +++ b/t/t55-repo-configs-wild-without-CREATOR @@ -1,7 +1,7 @@ # vim: syn=sh: for bc in 0 1 do - for gcw in 0 1 + for gcw in 0 do cd $TESTDIR $TESTDIR/rollback || die "rollback failed" @@ -9,7 +9,6 @@ do name INTERNAL editrc GL_WILDREPOS 1 editrc GL_BIG_CONFIG $bc - echo "\$GL_GITCONFIG_WILD = $gcw;" | addrc # ---------- @@ -47,8 +46,6 @@ do RW = @leads config foo.bar = baz " | ugc -r - [ "$gcw" = "0" ] && expect "remote: git config set for bar/..\* but \$GL_GITCONFIG_WILD not set" - [ "$gcw" = "1" ] && notexpect "remote: git config set for bar/..\* but \$GL_GITCONFIG_WILD not set" notexpect "git config.*not allowed" expect_push_ok "master -> master" diff --git a/t/t56-repo-configs-wild-with-CREATOR b/t/t56-repo-configs-wild-with-CREATOR index ca0977e..f6dc949 100644 --- a/t/t56-repo-configs-wild-with-CREATOR +++ b/t/t56-repo-configs-wild-with-CREATOR @@ -1,12 +1,11 @@ # vim: syn=sh: -for gcw in 0 1 +for gcw in 0 do cd $TESTDIR $TESTDIR/rollback || die "rollback failed" name INTERNAL editrc GL_WILDREPOS 1 - echo "\$GL_GITCONFIG_WILD = $gcw;" | addrc # ---------- @@ -42,8 +41,6 @@ do RW = @leads config foo.bar = baz " | ugc -r - [ "$gcw" = "0" ] && expect "remote: git config set for bar/\$creator/..\* but \$GL_GITCONFIG_WILD not set" - [ "$gcw" = "1" ] && notexpect "remote: git config set for bar/\$creator/..\* but \$GL_GITCONFIG_WILD not set" notexpect "git config.*not allowed" expect_push_ok "master -> master"