rep075.tcl [plain text]

# See the file LICENSE for redistribution information.
#
# Copyright (c) 2001,2008 Oracle.  All rights reserved.
#
# $Id: rep075.tcl,v 12.7 2008/01/08 20:58:53 bostic Exp $
#
# TEST	rep075
# TEST	Replication and prepared transactions.
# TEST	Test having outstanding prepared transactions and simulating
# TEST	crashing or upgrading or downgrading sites.
# TEST
#
proc rep075 { method { tnum "075" } args } {

	source ./include.tcl
	if { $is_windows9x_test == 1 } {
		puts "Skipping replication test on Win 9x platform."
		return
	}

	# Run for all access methods.
	if { $checking_valid_methods } {
		set test_methods { btree }
		return $test_methods
	}
	if { [is_btree $method] == 0 } {
		puts "Rep075: Skipping for method $method"
		return
	}

	set args [convert_args $method $args]
	set logsets [create_logsets 2]
	#
	# Swapping the envs is the only thing that should
	# work for:
	#   HP, old Windows: can't open two handles on same env.
	#   in-memory logs: prepared txns don't survive recovery
	#
	global mixed_mode_logging
	if { $is_hp_test == 1  || $is_windows9x_test == 1 ||
	     $mixed_mode_logging > 0 } {
		set prep {swap}
	} else {
		set prep {dbrecover swap resolve recover envrecover}
	}
	set ops {commit abort both}

	# Run the body of the test with and without recovery.
	foreach l $logsets {
		foreach p $prep {
			foreach o $ops {
				puts "Rep$tnum ($method $p $o):\
				    Replication and prepared txns."
				puts "Rep$tnum: Master logs are [lindex $l 0]"
				puts "Rep$tnum: Client logs are [lindex $l 1]"
				puts "Rep$tnum: close DBs after prepare"
				rep075_sub $method $tnum $l $p $o 1 $args
				puts "Rep$tnum: close DBs before prepare"
				rep075_sub $method $tnum $l $p $o 0 $args
			}
		}
	}
}

proc rep075_sub { method tnum logset prep op after largs } {
	global testdir
	global rep_verbose
	global verbose_type
	global util_path

	set verbargs ""
	if { $rep_verbose == 1 } {
		set verbargs " -verbose {$verbose_type on} "
	}
	env_cleanup $testdir

	replsetup $testdir/MSGQUEUEDIR

	set masterdir $testdir/MASTERDIR
	set clientdir $testdir/CLIENTDIR
	set clientdir2 $testdir/CLIENTDIR2
	file mkdir $masterdir
	file mkdir $clientdir
	file mkdir $clientdir2

        # Log size is small so we quickly create more than one.
	# The documentation says that the log file must be at least
	# four times the size of the in-memory log buffer.
	set pagesize 4096
	append largs " -pagesize $pagesize "
	set log_buf [expr $pagesize * 2]
	set log_max [expr $log_buf * 4]
	set m_logargs " -log_buffer $log_buf "
	set c_logargs " -log_buffer $log_buf "

	set m_logtype [lindex $logset 0]
	set c_logtype [lindex $logset 1]

	# In-memory logs require a large log buffer, and cannot
	# be used with -txn nosync.
	set m_logargs [adjust_logargs $m_logtype]
	set c_logargs [adjust_logargs $c_logtype]
	set m_txnargs [adjust_txnargs $m_logtype]
	set c_txnargs [adjust_txnargs $c_logtype]

	# Open a master.
	repladd 1
	set ma_envcmd "berkdb_env_noerr -create $m_txnargs \
	    $m_logargs -errpfx ENV0 -log_max $log_max $verbargs \
	    -home $masterdir -rep_transport \[list 1 replsend\]"
	set env0 [eval $ma_envcmd -rep_master]
	set masterenv $env0
	error_check_good master_env [is_valid_env $env0] TRUE

	# Open a client.
	repladd 2
	set cl_envcmd "berkdb_env_noerr -create $c_txnargs \
	    $c_logargs -errpfx ENV1 -log_max $log_max $verbargs \
	    -home $clientdir -rep_transport \[list 2 replsend\]"
	set env1 [eval $cl_envcmd -rep_client]
	set clientenv $env1
	error_check_good client_env [is_valid_env $env1] TRUE

	repladd 3
	set cl2_envcmd "berkdb_env_noerr -create $c_txnargs \
	    $c_logargs -errpfx ENV2 -log_max $log_max $verbargs \
	    -home $clientdir2 -rep_transport \[list 3 replsend\]"
	set env2 [eval $cl2_envcmd -rep_client]
	set clientenv2 $env2
	error_check_good client_env [is_valid_env $env2] TRUE

	set omethod [convert_method $method]

	# Bring the clients online by processing the startup messages.
	set envlist "{$env0 1} {$env1 2} {$env2 3}"
	process_msgs $envlist

	#
	# Run rep_test in a database with a sub database.
	#
	set testfile "test$tnum.db"
	set sub "subdb"
	set db1 [eval {berkdb_open_noerr -env $masterenv -auto_commit \
	    -create -mode 0644} $largs $omethod $testfile $sub]
	error_check_good dbopen [is_valid_db $db1] TRUE

	puts "\tRep$tnum.a: Running rep_test in replicated env."
	set niter 1
	eval rep_test $method $masterenv $db1 $niter 0 0 0 0 $largs
	process_msgs $envlist

	set testfile2 "test$tnum.2.db"
	set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \
	    -create -mode 0644} $largs $omethod $testfile2]
	error_check_good dbopen [is_valid_db $db] TRUE

	#
	# Create and prepare 2 transactions:
	# One txn is for the first database and one txn for the
	# second database.  We want to test that we can detect
	# when the last restored txn has been resolved.  And we
	# want to test various files being open.
	#
	puts "\tRep$tnum.b: Prepare some txns."
	set pbnyc 2
	set key key
	set data some_data
	set txn1 [$masterenv txn]
	error_check_good txn [is_valid_txn $txn1 $masterenv] TRUE
	error_check_good put [$db1 put -txn $txn1 $key $data] 0

	set gid [make_gid rep075:$txn1]
	error_check_good commit [$txn1 prepare $gid] 0

	set txn2 [$masterenv txn]
	error_check_good txn [is_valid_txn $txn2 $masterenv] TRUE
	error_check_good put [$db put -txn $txn2 $key $data] 0

	set gid [make_gid rep075:$txn2]
	error_check_good commit [$txn2 prepare $gid] 0
	if { $after == 0 } {
		$db1 close
		$db close
	}
	process_msgs $envlist

	#
	# Now we have txns on a master that are PBNYC (prepared but
	# not yet committed).  Alter the replication system now
	# based on what we're testing this time through.
	#
	puts "\tRep$tnum.c: Reset replication ($prep)."

	if { $op == "commit" } {
		set op1 commit
		set op2 commit
	} elseif { $op == "abort" } {
		set op1 abort
		set op2 abort
	} else {
		set i [berkdb random_int 0 1]
		if { $i == 0 } {
			set op1 commit
			set op2 abort
		} else {
			set op1 abort
			set op2 commit
		}
	}
	set oplist [list $op1 $op2]
	#
	# If we are doing a swap, swap roles between master and client
	# and then call txn recover.  Master should then commit.
	# This operation tests handling prepared txns in replication code.
	#
	# If we are doing a recover, each site stops using its old
	# env handle and then opens a new one, with recovery.
	# This operation tests handling prepared txns and then
	# starting replication.
	#
	# If we are doing an envrecover, each site stops using its old
	# env handle and then opens a new one, with recovery.
	# Each site then opens a 2nd dbenv handle to run txn_recover
	# and resolve each operation.
	# This operation tests handling prepared txns and then
	# starting replication.
	#
	# If we are doing a resolve, each site prepares the txns
	# and then resolves the txns and then stops using the old
	# env handle to cause a "crash".  We then open a new one
	# with recovery.  This operation tests handling prepared
	# txns and having them resolved.
	#
	if { $prep == "swap" } {
		puts "\tRep$tnum.c.0: Swap roles master->client."
		#
		# A downgrading master must resolve the txns.  So, commit
		# them here, but don't send the messages to the client that
		# is about to become master.
		#
		error_check_good commit [$txn1 commit] 0
		error_check_good commit [$txn2 commit] 0
		if { $after == 1 } {
			$db1 close
			$db close
		}
		replclear 2
		replclear 3
		set newclient $env0
		error_check_good downgrade [$newclient rep_start -client] 0
		set ctxnlist [$newclient txn_recover]
		set newmaster $env1
		puts "\tRep$tnum.c.1: Swap roles client->master."
		error_check_good upgrade [$newmaster rep_start -master] 0
		set txnlist [$newmaster txn_recover]

		puts "\tRep$tnum.c.2: Check status of prepared txn."
		error_check_good txnlist_len [llength $txnlist] $pbnyc
		error_check_good txnlist_len [llength $ctxnlist] 0

		#
		# Now commit that old prepared txn.
		#
		puts "\tRep$tnum.c.3: Resolve prepared txn ($op)."
		rep075_resolve $txnlist $oplist
	} elseif { $prep == "recover" } {
		#
		# To simulate a crash, simply stop using the old handles
		# and reopen new ones, with recovery.  First flush both
		# the log and mpool to disk.
		#
		set origenv0 $env0
		set origenv1 $env1
		set origtxn1 $txn1
		set origtxn2 $txn2
		puts "\tRep$tnum.c.0: Sync and recover master environment."
		error_check_good flush1 [$env0 log_flush] 0
		error_check_good sync1 [$env0 mpool_sync] 0
		if { $after == 1 } {
			$db1 close
			$db close
		}
		set env0 [eval $ma_envcmd -recover]
		error_check_good master_env [is_valid_env $env0] TRUE
		puts "\tRep$tnum.c.1: Run txn_recover on master env."
		set txnlist [$env0 txn_recover]
		error_check_good txnlist_len [llength $txnlist] $pbnyc
		puts "\tRep$tnum.c.2: Resolve txn ($op) on master env."
		rep075_resolve $txnlist $oplist

		puts "\tRep$tnum.c.3: Sync and recover client environment."
		error_check_good flush1 [$env1 log_flush] 0
		error_check_good sync1 [$env1 mpool_sync] 0
		set env1 [eval $cl_envcmd -recover]
		error_check_good client_env [is_valid_env $env1] TRUE
		puts "\tRep$tnum.c.4: Run txn_recover on client env."
		set txnlist [$env1 txn_recover]
		error_check_good txnlist_len [llength $txnlist] $pbnyc

		puts "\tRep$tnum.c.5: Resolve txn ($op) on client env."
		rep075_resolve $txnlist $oplist

		puts "\tRep$tnum.c.6: Restart replication on both envs."
		error_check_good master [$env0 rep_start -master] 0
		error_check_good client [$env1 rep_start -client] 0
		set newmaster $env0
		set envlist "{$env0 1} {$env1 2} {$env2 3}"
		#
		# Clean up old Tcl handles.
		#
		catch {$origenv0 close} res
		catch {$origenv1 close} res
		catch {$origtxn1 close} res
		catch {$origtxn2 close} res
	} elseif { $prep == "resolve" } {
		#
		# Check having prepared txns in the log, but they are
		# also resolved before we "crash".
		# To simulate a crash, simply stop using the old handles
		# and reopen new ones, with recovery.  First flush both
		# the log and mpool to disk.
		#
		set origenv0 $env0
		set origenv1 $env1
		set origdb1 $db1
		set origdb $db
		puts "\tRep$tnum.c.0: Resolve ($op1 $op2) and recover master."
		error_check_good resolve1 [$txn1 $op1] 0
		error_check_good resolve2 [$txn2 $op2] 0
		error_check_good flush0 [$env0 log_flush] 0
		error_check_good sync0 [$env0 mpool_sync] 0
		process_msgs $envlist
		set env0 [eval $ma_envcmd -recover]
		error_check_good master_env [is_valid_env $env0] TRUE
		puts "\tRep$tnum.c.1: Run txn_recover on master env."
		set txnlist [$env0 txn_recover]
		error_check_good txnlist_len [llength $txnlist] 0

		puts "\tRep$tnum.c.2: Sync and recover client environment."
		error_check_good flush1 [$env1 log_flush] 0
		error_check_good sync1 [$env1 mpool_sync] 0
		set env1 [eval $cl_envcmd -recover]
		error_check_good client_env [is_valid_env $env1] TRUE
		puts "\tRep$tnum.c.3: Run txn_recover on client env."
		set txnlist [$env1 txn_recover]
		error_check_good txnlist_len [llength $txnlist] 0

		puts "\tRep$tnum.c.4: Restart replication on both envs."
		error_check_good master [$env0 rep_start -master] 0
		error_check_good client [$env1 rep_start -client] 0
		set newmaster $env0
		set envlist "{$env0 1} {$env1 2} {$env2 3}"
		catch {$origenv0 close} res
		catch {$origenv1 close} res
		catch {$origdb close} res
		catch {$origdb1 close} res
	} elseif { $prep == "envrecover" || $prep == "dbrecover" } {
		#
		# To simulate a crash, simply stop using the old handles
		# and reopen new ones, with recovery.  First flush both
		# the log and mpool to disk.
		#
		set origenv0 $env0
		set origenv1 $env1
		set origtxn1 $txn1
		set origtxn2 $txn2
		puts "\tRep$tnum.c.0: Sync and recover master environment."
		error_check_good flush1 [$env0 log_flush] 0
		error_check_good sync1 [$env0 mpool_sync] 0
		set oldgen [stat_field $env0 rep_stat "Generation number"]
		error_check_good flush1 [$env1 log_flush] 0
		error_check_good sync1 [$env1 mpool_sync] 0
		if { $after == 1 } {
			$db1 close
			$db close
		}
		if { $prep == "dbrecover" } {
			set recargs "-h $masterdir -c "
			set stat [catch {eval exec $util_path/db_recover \
			    -e $recargs} result]
			if { $stat == 1 } {
				error "FAIL: Recovery error: $result."
			}
			set recargs "-h $clientdir -c "
			set stat [catch {eval exec $util_path/db_recover \
			    -e $recargs} result]
			if { $stat == 1 } {
				error "FAIL: Recovery error: $result."
			}
		}
		#
		# !!!
		# We still need to open with recovery, even if 'dbrecover'
		# because db_recover cannot open the env with replication
		# enabled.  But db_recover will be the real recovery that
		# needs to deal with the prepared txn.  This recovery below
		# for db_recover, should be a no-op essentially.
		#
		set recenv0 [eval $ma_envcmd -recover]
		error_check_good master_env [is_valid_env $recenv0] TRUE
		puts "\tRep$tnum.c.1: Run txn_recover on master env."
		set env0 [eval $ma_envcmd]
		error_check_good master_env [is_valid_env $env0] TRUE
		set txnlist [$env0 txn_recover]
		error_check_good txnlist_len [llength $txnlist] $pbnyc
		puts "\tRep$tnum.c.2: Resolve txn ($op) on master env."
		rep075_resolve $txnlist $oplist
		error_check_good recenv0_close [$recenv0 close] 0

		puts "\tRep$tnum.c.3: Recover client environment."
		set recenv1 [eval $cl_envcmd -recover -errpfx "ENV1REC"]
		error_check_good client_env [is_valid_env $recenv1] TRUE
		puts "\tRep$tnum.c.4: Run txn_recover on client env."
		set env1 [eval $cl_envcmd -errpfx "ENV1NEW"]
		error_check_good client_env [is_valid_env $env1] TRUE
		set txnlist [$env1 txn_recover]
		error_check_good txnlist_len [llength $txnlist] $pbnyc

		puts "\tRep$tnum.c.5: Resolve txns ($oplist) on client env."
		rep075_resolve $txnlist $oplist
		error_check_good recenv1_close [$recenv1 close] 0

		puts "\tRep$tnum.c.6: Restart replication on both envs."
		if { $prep == "dbrecover" } {
			#
			# XXX Since we ran db_recover, we lost the rep gen
			# and clientenv2 cannot detect the change.  Until
			# SR 15396 is fixed, we'll fake it by becoming
			# master, downgrading and then upgrading again to
			# advance the generation number.
			#
			error_check_good master [$env0 rep_start -master] 0
			error_check_good master [$env0 rep_start -client] 0
			replclear 2
			replclear 3
		}
		error_check_good master [$env0 rep_start -master] 0
		set gen [stat_field $env0 rep_stat "Generation number"]
		error_check_bad gen $gen $oldgen
		error_check_good client [$env1 rep_start -client] 0
		set newmaster $env0
		set envlist "{$env0 1} {$env1 2} {$env2 3}"
		process_msgs $envlist
		#
		# Clean up old Tcl handles.
		#
		catch {$origenv0 close} res
		catch {$origenv1 close} res
		catch {$origtxn1 close} res
		catch {$origtxn2 close} res
	}
	#
	# Run a standard rep_test creating test.db now.
	#
	eval rep_test $method $newmaster NULL $niter 0 0 0 0 $largs
	process_msgs $envlist

	#
	# Verify whether or not the key exists in the databases both
	# on the client and the master.
	#
	puts "\tRep$tnum.d: Verify prepared data."
	foreach e $envlist {
		set env [lindex $e 0]
		set db1 [eval {berkdb_open_noerr -env $env -auto_commit \
		    -create -mode 0644} $largs $omethod $testfile $sub]
		error_check_good dbopen [is_valid_db $db1] TRUE
		set db2 [eval {berkdb_open_noerr -env $env -auto_commit \
		    -create -mode 0644} $largs $omethod $testfile2]
		error_check_good dbopen [is_valid_db $db2] TRUE
		set k1 [$db1 get $key]
		set k2 [$db2 get $key]
		if { $op1 == "commit" } {
			error_check_good key [llength $k1] 1
		} else {
			error_check_good key [llength $k1] 0
		}
		if { $op2 == "commit" } {
			error_check_good key [llength $k2] 1
		} else {
			error_check_good key [llength $k2] 0
		}

		error_check_good db_close [$db1 close] 0
		error_check_good db_close [$db2 close] 0
	}
	error_check_good env0_close [$env0 close] 0
	error_check_good env1_close [$env1 close] 0
	error_check_good env2_close [$env2 close] 0

	replclose $testdir/MSGQUEUEDIR
	return
}

proc rep075_resolve { txnlist ops } {
	error_check_good resolve_lists [llength $txnlist] [llength $ops]
	foreach trec $txnlist op $ops {
		set txn [lindex $trec 0]
		error_check_good commit [$txn $op] 0
	}
}