# See the file LICENSE for redistribution information. # # Copyright (c) 2001-2002 # Sleepycat Software. All rights reserved. # # $Id: reputils.tcl,v 1.1.1.1 2003/02/15 04:56:16 zarzycki Exp $ # # Replication testing utilities # Environment handle for the env containing the replication "communications # structure" (really a CDB environment). # The test environment consists of a queue and a # directory (environment) # per replication site. The queue is used to hold messages destined for a # particular site and the directory will contain the environment for the # site. So the environment looks like: # $testdir # ___________|______________________________ # / | \ \ # MSGQUEUEDIR MASTERDIR CLIENTDIR.0 ... CLIENTDIR.N-1 # | | ... | # 1 2 .. N+1 # # The master is site 1 in the MSGQUEUEDIR and clients 1-N map to message # queues 2 - N+1. # # The globals repenv(1-N) contain the environment handles for the sites # with a given id (i.e., repenv(1) is the master's environment. global queueenv # Array of DB handles, one per machine ID, for the databases that contain # messages. global queuedbs global machids global elect_timeout set elect_timeout 50000000 set drop 0 # Create the directory structure for replication testing. # Open the master and client environments; store these in the global repenv # Return the master's environment: "-env masterenv" # proc repl_envsetup { envargs largs tnum {nclients 1} {droppct 0} { oob 0 } } { source ./include.tcl global clientdir global drop drop_msg global masterdir global repenv global testdir env_cleanup $testdir replsetup $testdir/MSGQUEUEDIR set masterdir $testdir/MASTERDIR file mkdir $masterdir if { $droppct != 0 } { set drop 1 set drop_msg [expr 100 / $droppct] } else { set drop 0 } for { set i 0 } { $i < $nclients } { incr i } { set clientdir($i) $testdir/CLIENTDIR.$i file mkdir $clientdir($i) } # Open a master. repladd 1 # # Set log smaller than default to force changing files, # but big enough so that the tests that use binary files # as keys/data can run. # set lmax [expr 3 * 1024 * 1024] set masterenv [eval {berkdb_env -create -log_max $lmax} $envargs \ {-home $masterdir -txn -rep_master -rep_transport \ [list 1 replsend]}] error_check_good master_env [is_valid_env $masterenv] TRUE set repenv(master) $masterenv # Open clients for { set i 0 } { $i < $nclients } { incr i } { set envid [expr $i + 2] repladd $envid set clientenv [eval {berkdb_env -create} $envargs -txn \ {-cachesize { 0 10000000 0 }} -lock_max 10000 \ {-home $clientdir($i) -rep_client -rep_transport \ [list $envid replsend]}] error_check_good client_env [is_valid_env $clientenv] TRUE set repenv($i) $clientenv } set repenv($i) NULL append largs " -env $masterenv " # Process startup messages repl_envprocq $tnum $nclients $oob return $largs } # Process all incoming messages. Iterate until there are no messages left # in anyone's queue so that we capture all message exchanges. We verify that # the requested number of clients matches the number of client environments # we have. The oob parameter indicates if we should process the queue # with out-of-order delivery. The replprocess procedure actually does # the real work of processing the queue -- this routine simply iterates # over the various queues and does the initial setup. proc repl_envprocq { tnum { nclients 1 } { oob 0 }} { global repenv global drop set masterenv $repenv(master) for { set i 0 } { 1 } { incr i } { if { $repenv($i) == "NULL"} { break } } error_check_good i_nclients $nclients $i set name [format "Repl%03d" $tnum] berkdb debug_check puts -nonewline "\t$name: Processing master/$i client queues" set rand_skip 0 if { $oob } { puts " out-of-order" } else { puts " in order" } set do_check 1 set droprestore $drop while { 1 } { set nproced 0 if { $oob } { set rand_skip [berkdb random_int 2 10] } incr nproced [replprocessqueue $masterenv 1 $rand_skip] for { set i 0 } { $i < $nclients } { incr i } { set envid [expr $i + 2] if { $oob } { set rand_skip [berkdb random_int 2 10] } set n [replprocessqueue $repenv($i) \ $envid $rand_skip] incr nproced $n } if { $nproced == 0 } { # Now that we delay requesting records until # we've had a few records go by, we should always # see that the number of requests is lower than the # number of messages that were enqueued. for { set i 0 } { $i < $nclients } { incr i } { set clientenv $repenv($i) set stats [$clientenv rep_stat] set queued [getstats $stats \ {Total log records queued}] error_check_bad queued_stats \ $queued -1 set requested [getstats $stats \ {Log records requested}] error_check_bad requested_stats \ $requested -1 if { $queued != 0 && $do_check != 0 } { error_check_good num_requested \ [expr $requested < $queued] 1 } $clientenv rep_request 1 1 } # If we were dropping messages, we might need # to flush the log so that we get everything # and end up in the right state. if { $drop != 0 } { set drop 0 set do_check 0 $masterenv rep_flush berkdb debug_check puts "\t$name: Flushing Master" } else { break } } } # Reset the clients back to the default state in case we # have more processing to do. for { set i 0 } { $i < $nclients } { incr i } { set clientenv $repenv($i) $clientenv rep_request 4 128 } set drop $droprestore } # Verify that the directories in the master are exactly replicated in # each of the client environments. proc repl_envver0 { tnum method { nclients 1 } } { global clientdir global masterdir global repenv # Verify the database in the client dir. # First dump the master. set t1 $masterdir/t1 set t2 $masterdir/t2 set t3 $masterdir/t3 set omethod [convert_method $method] set name [format "Repl%03d" $tnum] # # We are interested in the keys of whatever databases are present # in the master environment, so we just call a no-op check function # since we have no idea what the contents of this database really is. # We just need to walk the master and the clients and make sure they # have the same contents. # set cwd [pwd] cd $masterdir set stat [catch {glob test*.db} dbs] cd $cwd if { $stat == 1 } { return } foreach testfile $dbs { open_and_dump_file $testfile $repenv(master) $masterdir/t2 \ repl_noop dump_file_direction "-first" "-next" if { [string compare [convert_method $method] -recno] != 0 } { filesort $t2 $t3 file rename -force $t3 $t2 } for { set i 0 } { $i < $nclients } { incr i } { puts "\t$name: Verifying client $i database \ $testfile contents." open_and_dump_file $testfile $repenv($i) \ $t1 repl_noop dump_file_direction "-first" "-next" if { [string compare $omethod "-recno"] != 0 } { filesort $t1 $t3 } else { catch {file copy -force $t1 $t3} ret } error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0 } } } # Remove all the elements from the master and verify that these # deletions properly propagated to the clients. proc repl_verdel { tnum method { nclients 1 } } { global clientdir global masterdir global repenv # Delete all items in the master. set name [format "Repl%03d" $tnum] set cwd [pwd] cd $masterdir set stat [catch {glob test*.db} dbs] cd $cwd if { $stat == 1 } { return } foreach testfile $dbs { puts "\t$name: Deleting all items from the master." set txn [$repenv(master) txn] error_check_good txn_begin [is_valid_txn $txn \ $repenv(master)] TRUE set db [berkdb_open -txn $txn -env $repenv(master) $testfile] error_check_good reopen_master [is_valid_db $db] TRUE set dbc [$db cursor -txn $txn] error_check_good reopen_master_cursor \ [is_valid_cursor $dbc $db] TRUE for { set dbt [$dbc get -first] } { [llength $dbt] > 0 } \ { set dbt [$dbc get -next] } { error_check_good del_item [$dbc del] 0 } error_check_good dbc_close [$dbc close] 0 error_check_good txn_commit [$txn commit] 0 error_check_good db_close [$db close] 0 repl_envprocq $tnum $nclients # Check clients. for { set i 0 } { $i < $nclients } { incr i } { puts "\t$name: Verifying emptiness of client database $i." set db [berkdb_open -env $repenv($i) $testfile] error_check_good reopen_client($i) \ [is_valid_db $db] TRUE set dbc [$db cursor] error_check_good reopen_client_cursor($i) \ [is_valid_cursor $dbc $db] TRUE error_check_good client($i)_empty \ [llength [$dbc get -first]] 0 error_check_good dbc_close [$dbc close] 0 error_check_good db_close [$db close] 0 } } } # Replication "check" function for the dump procs that expect to # be able to verify the keys and data. proc repl_noop { k d } { return } # Close all the master and client environments in a replication test directory. proc repl_envclose { tnum envargs } { source ./include.tcl global clientdir global encrypt global masterdir global repenv global testdir if { [lsearch $envargs "-encrypta*"] !=-1 } { set encrypt 1 } # In order to make sure that we have fully-synced and ready-to-verify # databases on all the clients, do a checkpoint on the master and # process messages in order to flush all the clients. set drop 0 set do_check 0 set name [format "Repl%03d" $tnum] berkdb debug_check puts "\t$name: Checkpointing master." error_check_good masterenv_ckp [$repenv(master) txn_checkpoint] 0 # Count clients. for { set ncli 0 } { 1 } { incr ncli } { if { $repenv($ncli) == "NULL" } { break } } repl_envprocq $tnum $ncli error_check_good masterenv_close [$repenv(master) close] 0 verify_dir $masterdir "\t$name: " 0 0 1 for { set i 0 } { $i < $ncli } { incr i } { error_check_good client($i)_close [$repenv($i) close] 0 verify_dir $clientdir($i) "\t$name: " 0 0 1 } replclose $testdir/MSGQUEUEDIR } # Close up a replication group proc replclose { queuedir } { global queueenv queuedbs machids foreach m $machids { set db $queuedbs($m) error_check_good dbr_close [$db close] 0 } error_check_good qenv_close [$queueenv close] 0 set machids {} } # Create a replication group for testing. proc replsetup { queuedir } { global queueenv queuedbs machids file mkdir $queuedir set queueenv \ [berkdb_env -create -txn -lock_max 20000 -home $queuedir] error_check_good queueenv [is_valid_env $queueenv] TRUE if { [info exists queuedbs] } { unset queuedbs } set machids {} return $queueenv } # Send function for replication. proc replsend { control rec fromid toid } { global queuedbs queueenv machids global drop drop_msg # # If we are testing with dropped messages, then we drop every # $drop_msg time. If we do that just return 0 and don't do # anything. # if { $drop != 0 } { incr drop if { $drop == $drop_msg } { set drop 1 return 0 } } # XXX # -1 is DB_BROADCAST_MID if { $toid == -1 } { set machlist $machids } else { if { [info exists queuedbs($toid)] != 1 } { error "replsend: machid $toid not found" } set machlist [list $toid] } foreach m $machlist { # XXX should a broadcast include to "self"? if { $m == $fromid } { continue } set db $queuedbs($m) set txn [$queueenv txn] $db put -txn $txn -append [list $control $rec $fromid] error_check_good replsend_commit [$txn commit] 0 } return 0 } # Nuke all the pending messages for a particular site. proc replclear { machid } { global queuedbs queueenv if { [info exists queuedbs($machid)] != 1 } { error "FAIL: replclear: machid $machid not found" } set db $queuedbs($machid) set txn [$queueenv txn] set dbc [$db cursor -txn $txn] for { set dbt [$dbc get -rmw -first] } { [llength $dbt] > 0 } \ { set dbt [$dbc get -rmw -next] } { error_check_good replclear($machid)_del [$dbc del] 0 } error_check_good replclear($machid)_dbc_close [$dbc close] 0 error_check_good replclear($machid)_txn_commit [$txn commit] 0 } # Add a machine to a replication environment. proc repladd { machid } { global queueenv queuedbs machids if { [info exists queuedbs($machid)] == 1 } { error "FAIL: repladd: machid $machid already exists" } set queuedbs($machid) [berkdb open -auto_commit \ -env $queueenv -create -recno -renumber repqueue$machid.db] error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE lappend machids $machid } # Process a queue of messages, skipping every "skip_interval" entry. # We traverse the entire queue, but since we skip some messages, we # may end up leaving things in the queue, which should get picked up # on a later run. proc replprocessqueue { dbenv machid { skip_interval 0 } \ { hold_electp NONE } { newmasterp NONE } } { global queuedbs queueenv errorCode # hold_electp is a call-by-reference variable which lets our caller # know we need to hold an election. if { [string compare $hold_electp NONE] != 0 } { upvar $hold_electp hold_elect } set hold_elect 0 # newmasterp is the same idea, only returning the ID of a master # given in a DB_REP_NEWMASTER return. if { [string compare $newmasterp NONE] != 0 } { upvar $newmasterp newmaster } set newmaster 0 set nproced 0 set txn [$queueenv txn] set dbc [$queuedbs($machid) cursor -txn $txn] error_check_good process_dbc($machid) \ [is_valid_cursor $dbc $queuedbs($machid)] TRUE for { set dbt [$dbc get -first] } \ { [llength $dbt] != 0 } \ { set dbt [$dbc get -next] } { set data [lindex [lindex $dbt 0] 1] # If skip_interval is nonzero, we want to process messages # out of order. We do this in a simple but slimy way-- # continue walking with the cursor without processing the # message or deleting it from the queue, but do increment # "nproced". The way this proc is normally used, the # precise value of nproced doesn't matter--we just don't # assume the queues are empty if it's nonzero. Thus, # if we contrive to make sure it's nonzero, we'll always # come back to records we've skipped on a later call # to replprocessqueue. (If there really are no records, # we'll never get here.) # # Skip every skip_interval'th record (and use a remainder other # than zero so that we're guaranteed to really process at least # one record on every call). if { $skip_interval != 0 } { if { $nproced % $skip_interval == 1 } { incr nproced continue } } # We have to play an ugly cursor game here: we currently # hold a lock on the page of messages, but rep_process_message # might need to lock the page with a different cursor in # order to send a response. So save our recno, close # the cursor, and then reopen and reset the cursor. set recno [lindex [lindex $dbt 0] 0] error_check_good dbc_process_close [$dbc close] 0 error_check_good txn_commit [$txn commit] 0 set ret [catch {$dbenv rep_process_message \ [lindex $data 2] [lindex $data 0] [lindex $data 1]} res] set txn [$queueenv txn] set dbc [$queuedbs($machid) cursor -txn $txn] set dbt [$dbc get -set $recno] if { $ret != 0 } { if { [is_substr $res DB_REP_HOLDELECTION] } { set hold_elect 1 } else { error "FAIL:[timestamp]\ rep_process_message returned $res" } } incr nproced $dbc del if { $ret == 0 && $res != 0 } { if { [is_substr $res DB_REP_NEWSITE] } { # NEWSITE; do nothing. } else { set newmaster $res # Break as soon as we get a NEWMASTER message; # our caller needs to handle it. break } } if { $hold_elect == 1 } { # Break also on a HOLDELECTION, for the same reason. break } } error_check_good dbc_close [$dbc close] 0 error_check_good txn_commit [$txn commit] 0 # Return the number of messages processed. return $nproced } set run_repl_flag "-run_repl" proc extract_repl_args { args } { global run_repl_flag for { set arg [lindex $args [set i 0]] } \ { [string length $arg] > 0 } \ { set arg [lindex $args [incr i]] } { if { [string compare $arg $run_repl_flag] == 0 } { return [lindex $args [expr $i + 1]] } } return "" } proc delete_repl_args { args } { global run_repl_flag set ret {} for { set arg [lindex $args [set i 0]] } \ { [string length $arg] > 0 } \ { set arg [lindex $args [incr i]] } { if { [string compare $arg $run_repl_flag] != 0 } { lappend ret $arg } else { incr i } } return $ret } global elect_serial global elections_in_progress set elect_serial 0 # Start an election in a sub-process. proc start_election { qdir envstring nsites pri timeout {err "none"}} { source ./include.tcl global elect_serial elect_timeout elections_in_progress machids incr elect_serial set t [open "|$tclsh_path >& $testdir/ELECTION_OUTPUT.$elect_serial" w] puts $t "source $test_path/test.tcl" puts $t "replsetup $qdir" foreach i $machids { puts $t "repladd $i" } puts $t "set env_cmd \{$envstring\}" puts $t "set dbenv \[eval \$env_cmd -errfile \ $testdir/ELECTION_ERRFILE.$elect_serial -errpfx FAIL: \]" # puts "Start election err $err, env $envstring" puts $t "\$dbenv test abort $err" puts $t "set res \[catch \{\$dbenv rep_elect $nsites $pri \ $elect_timeout\} ret\]" if { $err != "none" } { puts $t "\$dbenv test abort none" puts $t "set res \[catch \{\$dbenv rep_elect $nsites $pri \ $elect_timeout\} ret\]" } flush $t set elections_in_progress($elect_serial) $t return $elect_serial } proc close_election { i } { global elections_in_progress set t $elections_in_progress($i) puts $t "\$dbenv close" close $t unset elections_in_progress($i) } proc cleanup_elections { } { global elect_serial elections_in_progress for { set i 0 } { $i <= $elect_serial } { incr i } { if { [info exists elections_in_progress($i)] != 0 } { close_election $i } } set elect_serial 0 }