# See the file LICENSE for redistribution information. # # Copyright (c) 2002-2003 # Sleepycat Software. All rights reserved. # # $Id: rep005.tcl,v 1.2 2004/03/30 01:24:08 jtownsen Exp $ # # TEST rep005 # TEST Replication election test with error handling. # TEST # TEST Run a modified version of test001 in a replicated master environment; # TEST hold an election among a group of clients to make sure they select # TEST a proper master from amongst themselves, forcing errors at various # TEST locations in the election path. proc rep005 { method args } { source ./include.tcl if { [is_btree $method] == 0 } { puts "Rep005: Skipping for method $method." return } global rand_init error_check_good set_random_seed [berkdb srand $rand_init] 0 set tnum "005" set niter 10 set nclients 3 env_cleanup $testdir set qdir $testdir/MSGQUEUEDIR replsetup $qdir set masterdir $testdir/MASTERDIR file mkdir $masterdir for { set i 0 } { $i < $nclients } { incr i } { set clientdir($i) $testdir/CLIENTDIR.$i file mkdir $clientdir($i) } puts -nonewline \ "Rep$tnum: Replication election error test with $nclients clients." puts -nonewline \ " Started at: " puts [clock format [clock seconds] -format "%H:%M %D"] # Open a master. repladd 1 set env_cmd(M) "berkdb_env -create -log_max 1000000 -home $masterdir \ -txn nosync -rep_master -rep_transport \[list 1 replsend\]" # To debug elections, uncomment the line below and further below # for the clients to turn on verbose. Also edit reputils.tcl # in proc start_election and swap the 2 commented lines with # their counterpart. # set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ # -home $masterdir -txn nosync -rep_master \ # -verbose {rep on} -errpfx MASTER -errfile /dev/stderr \ # -rep_transport \[list 1 replsend\]" set masterenv [eval $env_cmd(M)] error_check_good master_env [is_valid_env $masterenv] TRUE # Open the clients. for { set i 0 } { $i < $nclients } { incr i } { set envid [expr $i + 2] repladd $envid set env_cmd($i) "berkdb_env -create -home $clientdir($i) \ -txn nosync -rep_client \ -rep_transport \[list $envid replsend\]" # set env_cmd($i) "berkdb_env_noerr -create -home $clientdir($i) \ # -verbose {rep on} -errpfx CLIENT$i -errfile /dev/stderr \ # -txn nosync -rep_client \ # -rep_transport \[list $envid replsend\]" set clientenv($i) [eval $env_cmd($i)] error_check_good \ client_env($i) [is_valid_env $clientenv($i)] TRUE } # Run a modified test001 in the master. puts "\tRep$tnum.a: Running test001 in replicated env." eval test001 $method $niter 0 0 $tnum -env $masterenv $args # Loop, processing first the master's messages, then the client's, # until both queues are empty. while { 1 } { set nproced 0 incr nproced [replprocessqueue $masterenv 1] for { set i 0 } { $i < $nclients } { incr i } { set envid [expr $i + 2] incr nproced [replprocessqueue $clientenv($i) $envid] } if { $nproced == 0 } { break } } # Verify the database in the client dir. for { set i 0 } { $i < $nclients } { incr i } { puts "\tRep$tnum.b: Verifying contents of client database $i." set testdir [get_home $masterenv] set t1 $testdir/t1 set t2 $testdir/t2 set t3 $testdir/t3 open_and_dump_file test$tnum.db $clientenv($i) $testdir/t1 \ test001.check dump_file_direction "-first" "-next" if { [string compare [convert_method $method] -recno] != 0 } { filesort $t1 $t3 } error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0 verify_dir $clientdir($i) "\tRep$tnum.c: " 0 0 1 } # Make sure all the clients are synced up and ready to be good # voting citizens. error_check_good master_flush [$masterenv rep_flush] 0 while { 1 } { set nproced 0 incr nproced [replprocessqueue $masterenv 1 0] for { set i 0 } { $i < $nclients } { incr i } { incr nproced [replprocessqueue $clientenv($i) \ [expr $i + 2] 0] } if { $nproced == 0 } { break } } error_check_good masterenv_close [$masterenv close] 0 for { set i 0 } { $i < $nclients } { incr i } { replclear [expr $i + 2] } # # We set up the error list for each client. We know that the # first client is the one calling the election, therefore, add # the error location on sending the message (electsend) for that one. set m "Rep$tnum" set count 0 set win -1 # # A full test can take a long time to run. For normal testing # pare it down a lot so that it runs in a shorter time. # set c0err { none electinit none none } set c1err $c0err set c2err $c0err set numtests [expr [llength $c0err] * [llength $c1err] * \ [llength $c2err]] puts "\t$m.d: Starting $numtests election with error tests" set last_win -1 set win -1 foreach c0 $c0err { foreach c1 $c1err { foreach c2 $c2err { set elist [list $c0 $c1 $c2] rep005_elect env_cmd clientenv $qdir $m \ $count win last_win $elist incr count } } } for { set i 0 } { $i < $nclients } { incr i } { error_check_good clientenv_close($i) [$clientenv($i) close] 0 } replclose $testdir/MSGQUEUEDIR puts -nonewline \ "Rep$tnum: Completed at: " puts [clock format [clock seconds] -format "%H:%M %D"] } proc rep005_elect { ecmd cenv qdir msg count winner lsn_lose elist } { global elect_timeout elect_serial global is_windows_test upvar $ecmd env_cmd upvar $cenv clientenv upvar $winner win upvar $lsn_lose last_win set elect_timeout 5000000 set nclients [llength $elist] set nsites [expr $nclients + 1] set cl_list {} for { set i 0 } { $i < $nclients } { incr i } { set err_cmd($i) [lindex $elist $i] set elect_pipe($i) INVALID replclear [expr $i + 2] lappend cl_list $i } # Select winner. We want to test biggest LSN wins, and secondarily # highest priority wins. If we already have a master, make sure # we don't start a client in that master. set el 0 if { $win == -1 } { if { $last_win != -1 } { set cl_list [lreplace $cl_list $last_win $last_win] set el $last_win } set windex [berkdb random_int 1 [expr [llength $cl_list] - 1]] set win [lindex $cl_list $windex] } else { # Easy case, if we have a master, the winner must be the # same one as last time, just use $win. # If client0 is the current existing master, start the # election in client 1. if {$win == 0} { set el 1 } } # Winner has priority 100. If we are testing LSN winning, the # make sure the lowest LSN client has the highest priority. # Everyone else has priority 10. for { set i 0 } { $i < $nclients } { incr i } { if { $i == $win } { set pri($i) 100 } elseif { $i == $last_win } { set pri($i) 200 } else { set pri($i) 10 } } puts "\t$msg.d.$count: Start election (win=client$win) $elist" incr elect_serial set pfx "CHILD$el.$elect_serial" # Windows requires a longer timeout. if { $is_windows_test == 1 } { set elect_timeout [expr $elect_timeout * 3] } set elect_pipe($el) [start_election $pfx $qdir $env_cmd($el) \ $nsites $pri($el) $elect_timeout $err_cmd($el)] tclsleep 2 set got_newmaster 0 set tries 10 while { 1 } { set nproced 0 set he 0 set nm 0 set nm2 0 for { set i 0 } { $i < $nclients } { incr i } { set he 0 set envid [expr $i + 2] set child_done [check_election $elect_pipe($i) nm2] if { $got_newmaster == 0 && $nm2 != 0 } { error_check_good newmaster_is_master $nm2 \ [expr $win + 2] set got_newmaster $nm2 # If this env is the new master, it needs to # configure itself as such--this is a different # env handle from the one that performed the # election. if { $nm2 == $envid } { error_check_good make_master($i) \ [$clientenv($i) rep_start -master] \ 0 } } incr nproced \ [replprocessqueue $clientenv($i) $envid 0 he nm] # puts "Tries $tries: Processed queue for client $i, $nproced msgs he $he nm $nm nm2 $nm2" if { $he == 1 } { # # Only close down the election pipe if the # previously created one is done and # waiting for new commands, otherwise # if we try to close it while it's in # progress we hang this main tclsh. # if { $elect_pipe($i) != "INVALID" && \ $child_done == 1 } { close_election $elect_pipe($i) set elect_pipe($i) "INVALID" } # puts "Starting election on client $i" if { $elect_pipe($i) == "INVALID" } { incr elect_serial set pfx "CHILD$i.$elect_serial" set elect_pipe($i) [start_election \ $pfx $qdir \ $env_cmd($i) $nsites $pri($i) \ $elect_timeout $err_cmd($i)] set got_hold_elect($i) 1 } } if { $nm != 0 } { error_check_good newmaster_is_master $nm \ [expr $win + 2] set got_newmaster $nm # If this env is the new master, it needs to # configure itself as such--this is a different # env handle from the one that performed the # election. if { $nm == $envid } { error_check_good make_master($i) \ [$clientenv($i) rep_start -master] \ 0 if { [expr $count % 10] == 0 } { set dbname rep005.$count.db set db [berkdb_open -env \ $clientenv($i) \ -auto_commit \ -create -btree $dbname] error_check_good dbopen \ [is_valid_db $db] TRUE error_check_good dbclose \ [$db close] 0 } } } } # We need to wait around to make doubly sure that the # election has finished... if { $nproced == 0 } { incr tries -1 if { $tries == 0 } { break } else { tclsleep 1 } } else { set tries 10 } } # Verify that client #1 is actually the winner. error_check_good "client $win wins" $got_newmaster [expr $win + 2] cleanup_elections # # Make sure that we've really processed all the post-election # sync-up messages. # while { 1 } { set nproced 0 for { set i 0 } { $i < $nclients } { incr i } { incr nproced [replprocessqueue $clientenv($i) \ [expr $i + 2] 0] } if { $nproced == 0 } { break } } # # Sometimes test elections with an existing master. # Other times test elections without master by closing the # master we just elected and creating a new client. # We want to weight it to close the new master. So, use # a list to cause closing about 70% of the time. # set close_list { 0 0 0 1 1 1 1 1 1 1} set close_len [expr [llength $close_list] - 1] set close_index [berkdb random_int 0 $close_len] if { [lindex $close_list $close_index] == 1 } { puts -nonewline "\t$msg.e.$count: Closing " error_check_good newmaster_close [$clientenv($win) close] 0 # # If the next test should win via LSN then remove the # env before starting the new client so that we # can guarantee this client doesn't win the next one. set lsn_win { 0 0 0 0 1 1 1 1 1 1 } set lsn_len [expr [llength $lsn_win] - 1] set lsn_index [berkdb random_int 0 $lsn_len] if { [lindex $lsn_win $lsn_index] == 1 } { set last_win $win set dirindex [lsearch -exact $env_cmd($win) "-home"] incr dirindex set lsn_dir [lindex $env_cmd($win) $dirindex] env_cleanup $lsn_dir puts -nonewline "and cleaning " } else { set last_win -1 } puts "new master, new client $win" set clientenv($win) [eval $env_cmd($win)] error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE set win -1 # # Since we started a new client we want to give them # all a chance to process everything outstanding before # the election on the next iteration. while { 1 } { set nproced 0 for { set i 0 } { $i < $nclients } { incr i } { incr nproced [replprocessqueue $clientenv($i) \ [expr $i + 2] 0] } if { $nproced == 0 } { break } } } }