User Tools

Site Tools


application_specific_backups:subversion_repositories_python

Subversion repositories (using python)

This is a python version of the script in Subversion repositories and does basically the same thing. It dumps subversion repositories into files in a specified folder (which must not be within a repository folder) and writes the backup's revision number to a second file. If there is a full backup of a repository, a differential (not an incremental!) backup is possible. It requires python3.

Usage: python svn_backup.py {Base|Full|Incremental|Differential|Since} dumpDir repositories [repositories …]

Base and Full will do a full backup, possibly overwriting the old full backup. Incremental, Differential and Since will all do differential backups from the existing full backup, if there is no existing full backup one will be created. It is not possible to specify revision numbers, the last revision will be backed up. dumpDir is the directory where the svn dumps will be placed and which will be backed up by bacula. repositories are the repository folders which will be dumped. Filename globbing is done, i.e. you can specify something like “D:\subversion_reps\*”. Works on Windows clients, should work on linux, too (not tested). There is no file compression, use the bacula file compression if you need it. You need to have the python executable in your PATH on your client machine, otherwise the Command in the RunScript section will not work. If python is not in your PATH, create a batch file on the client machine which runs python and svn_backup.py and insert the path to the batch file into the Command line. If your subversion binaries are not in the Client machine's PATH, put the path to your subversion binaries in the subversion_path variable.

Any corrections, suggestions, enhancements are welcome!

Insert this into your Job definition:

RunScript {
	Runs On Client = yes
	Runs When = before
	Fail Job On Error = yes
	Command = "python \"C:/path/to/svn_backup.py\" %l \"C:/path/to/dumpDir\" \"C:/path/to/repository1\" \"C:/path/to/repositories/*\""
}

svn_backup.py:

# svn_backup.py   Program to automatically dump subversion repositories to a
#                 folder in order to ease backup by sophisticated backup
#                 software like bacula and others.
# Copyright (C) Tilman Schröder, tilman.schroeder@gmx.de, 19.02.2012

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>

#!/usr/bin/env python3
# -*- coding: utf8 -*-

import os
import io
import subprocess
import argparse
import glob
import errno

subversion_path=""

def directory(dir):
	if os.path.isdir(dir):
		return(dir)
	# else
	raise argparse.ArgumentTypeError(dir+' is not a directory!')
# end directory

def complete_pathname_split(path):
	splitted=[]
	previous=None
	split=os.path.split(path)
	
	while split != previous:
		splitted.insert(0,split[1])
		previous=split
		split=os.path.split(split[0])
	
	splitted.insert(0,split[0])
	return(splitted)
# end complete_pathname_split

def backup_repository(level,dumpDir,repository):
	assert type(level) is str
	assert level in ['Base','Full','Incremental','Differential','Since']

	assert type(dumpDir) is str
	assert os.path.isdir(dumpDir)
	assert os.access(dumpDir,os.F_OK|os.R_OK|os.W_OK)
	
	assert type(repository) is str
	
	# this is needed to remove trailing slashes!
	# otherwise os.path.basename could return an empty string ...
	repository = os.path.realpath(repository)
	
	assert os.path.isdir(repository)
	assert os.access(repository,os.F_OK|os.R_OK|os.W_OK)
	
	assert type(subversion_path) is str
	
	# check whether dumpDir is outside of repository
	#                                                        to         start
	assert '..' in complete_pathname_split(os.path.relpath(dumpDir,repository)),'dumpDir may not be in repository dir!'
	
	svnenv=os.environ
	assert 'PATH' in svnenv.keys()
	if len(subversion_path) > 0:
		assert os.path.isdir(subversion_path)
		svnenv['PATH']=subversion_path
	#	path=myenv['PATH']
	#	path.split(os.pathsep)
	#	if subversion_path not in path:
	#		path.append(subversion_path)
	#		path=os.pathsep.join(path)
	#		myenv['PATH']=path
	
	youngest_subproc=subprocess.Popen(['svnlook','youngest',repository],stdout=subprocess.PIPE,stderr=subprocess.PIPE,env=svnenv)
	youngest=youngest_subproc.communicate() # tuple (stdoutdata, stderrdata) with byte strings
	assert youngest_subproc.returncode == 0,'stdout: '+str(youngest[0],'utf8')+'stderr: '+str(youngest[1],'utf8')
	youngest=str(youngest[0],encoding='utf8') # bytestring to normal string
	youngest=int(youngest.strip()) # make sure string is numeric/integer
	
	repository_name=os.path.basename(repository)
	assert len(repository_name) > 0
	
	if os.path.isfile(os.path.join(dumpDir,repository_name+'.full.svndump')) == False or level == 'Full':
		print('Full dump of '+repository_name+' (rev. '+str(youngest)+')')
		# open file to write full dump to
		repo_full = open(os.path.join(dumpDir,repository_name+'.full.svndump'),'wb')
		# dump revision youngest. it is necessary to supply the revision number if a commit completed between
		# getting the last revision (-> youngest) and this dump
		svnadminDump_subproc=subprocess.Popen(['svnadmin','dump','--quiet',repository,'--revision',str(youngest)],stdout=repo_full,stderr=subprocess.PIPE,env=svnenv)
		svnadminDump_output=svnadminDump_subproc.communicate() # wait for process to finish
		assert svnadminDump_subproc.returncode == 0,'stderr: '+str(svnadminDump_output[1],'utf8')
		repo_full.close()
		# write backed up revision to file
		repo_rev = open(os.path.join(dumpDir,repository_name+'.lastFullDump'),'wt',encoding='utf-8')
		assert len(str(youngest)) == repo_rev.write(str(youngest))
		repo_rev.close()
		try:
			os.remove(os.path.join(dumpDir,repository_name+'.incremental.svndump'))
		except OSError as e: # do nothing if file was not found
			if e.errno != errno.ENOENT: # No such file or directory
				raise
	else: # incremental backup
		try:
			last_fp=open(os.path.join(dumpDir,repository_name+'.lastFullDump'),'rt',encoding='utf-8')
		except IOError as e:
			if e.errno == errno.ENOENT: # No such file or directory
				raise RuntimeError('Incremental backup requested, but no last full backup found!')
			else: # unknown error
				raise
		else:
			last_str=last_fp.readline()
			last=int(last_str.strip())
			last_fp.close()
			if last < youngest:
				oldest=last+1
				print('Incremental dump of '+repository_name+' (rev. '+str(oldest)+'-'+str(youngest)+')')
				# open file to write incremental dump to
				repo_inc = open(os.path.join(dumpDir,repository_name+'.inc.svndump'),'wb')
				# dump revision oldest-youngest. it is necessary to supply the revision number if a commit completed between
				# getting the last revision (-> youngest) and this dump
				svnadminDump_subproc=subprocess.Popen(['svnadmin','dump','--quiet',repository,'--revision',str(oldest)+':'+str(youngest),'--incremental'],stdout=repo_inc,stderr=subprocess.PIPE,env=svnenv)
				svnadminDump_output=svnadminDump_subproc.communicate() # wait for process to finish
				assert svnadminDump_subproc.returncode == 0,'stderr: '+str(svnadminDump_output[1],'utf8')
				repo_inc.close()
			else:
				print('No changes in '+repository_name+' since last backup. Revision: '+str(youngest))
	return(0)
# end backup_repository

if __name__ == '__main__':
	print('svn_backup.py Copyright (C) Tilman Schröder, tilman.schroeder@gmx.de, 19.02.2012')
	print('This program is free software: you can redistribute it and/or modify')
	print('it under the terms of the GNU General Public License as published by')
	print('the Free Software Foundation, either version 3 of the License, or')
	print('(at your option) any later version.')
	parser = argparse.ArgumentParser(description='Backup subversion repositories to files.')
	parser.add_argument('level',type=str,choices=['Base','Full','Incremental','Differential','Since'],help='Level of backup: Base|Full|Incremental|Differential|Since')
	parser.add_argument('dumpDir',type=directory,help='Directory where the backups will be stored.')
	parser.add_argument('repositories',type=str,nargs='+',action='store',help='Paths to repositories. At least one is required.')
	arguments=parser.parse_args()
	
	if arguments.level == 'Base':
		arguments.level = 'Full'
	
	repositories=[]
	for repository in arguments.repositories:
		repositories[len(repositories):]=glob.glob(repository)
	
	for repository in repositories:
		returnvalue=backup_repository(arguments.level,arguments.dumpDir,repository)
		if returnvalue != 0:
			exit(returnvalue)
	
	exit(0)
# end main
application_specific_backups/subversion_repositories_python.txt · Last modified: 2012/02/19 00:17 by procyon