1: -module(partitions_SUITE). 2: -compile(export_all). 3: 4: -include_lib("common_test/include/ct.hrl"). 5: -include_lib("proper/include/proper.hrl"). 6: -include_lib("eunit/include/eunit.hrl"). 7: 8: all() -> [ 9: {group, tests} 10: ]. 11: 12: groups() -> 13: Tests = [ 14: enq_drain_basic 15: % prop_enq_drain 16: ], 17: [{tests, [], Tests}]. 18: 19: init_per_group(_, Config) -> Config. 20: 21: end_per_group(_, _Config) -> ok. 22: 23: init_per_testcase(print, Config0) -> 24: Nodes = erlang_nodes(5), 25: Servers = [{print, N} || N <- Nodes], 26: [{cluster_name, print}, 27: {nodes, Nodes}, {servers, Servers}, 28: {name, print} | Config0]; 29: init_per_testcase(TestCase, Config0) -> 30: Nodes = erlang_nodes(5), 31: Servers = [{TestCase, N} || N <- Nodes], 32: Config1 = prepare_erlang_cluster(Config0, Nodes), 33: Config = [{cluster_name, TestCase}, 34: {nodes, Nodes}, {servers, Servers}, 35: {name, TestCase} | Config1], 36: Machine = {module, ra_fifo, #{}}, 37: ServerId = setup_ra_cluster(Config, Machine), 38: %% Make sure nodes are synchronised 39: ct:pal("Members ~p~n", [ra:members(ServerId)]), 40: Config. 41: 42: end_per_testcase(print, Config) -> 43: Config; 44: end_per_testcase(_, Config) -> 45: Nodes = ?config(nodes, Config), 46: ct:pal("end_per_testcase: Stopping nodes ~p~n", [Nodes]), 47: erlang_node_helpers:stop_erlang_nodes(Nodes), 48: ct:pal("end_per_testcase: Stopped nodes ~p~n", [Nodes]), 49: ok. 50: 51: -type nodes5() :: foo1@localhost | 52: foo2@localhost | 53: foo3@localhost | 54: foo4@localhost | 55: foo5@localhost. 56: 57: -type actions() :: {part, [nodes5()], 500..10000} | 58: {wait, 500..10000}. 59: 60: -type wait_time() :: 500..10000. 61: 62: prop_enq_drain(Config) -> 63: ClusterName = ?config(cluster_name, Config), 64: Nodes = ?config(nodes, Config), 65: Servers = ?config(servers, Config), 66: run_proper( 67: fun () -> 68: ?FORALL(S, resize( 69: 10, 70: non_empty( 71: list( 72: oneof([{wait, wait_time()}, 73: heal, 74: {part, vector(2, nodes5()), 75: wait_time()}])))), 76: do_enq_drain_scenario(ClusterName, 77: Nodes, Servers, 78: [{wait, 5000}] ++ S ++ 79: [heal, {wait, 5000}])) 80: end, [], 10). 81: 82: print_scenario(Scenario) -> 83: ct:pal("Scenario ~p~n", [Scenario]), 84: true. 85: 86: enq_drain_basic(Config) -> 87: ClusterName = ?config(cluster_name, Config), 88: Nodes = ?config(nodes, Config), 89: Servers = ?config(servers, Config), 90: Scenario = [{wait, 5000}, 91: {part, select_some(Nodes), 5000}, 92: {app_restart, select_some(Servers)}, 93: {wait, 5000}, 94: {part, select_some(Nodes), 20000}, 95: {wait, 5000}], 96: true = do_enq_drain_scenario(ClusterName, Nodes, Servers, Scenario). 97: 98: do_enq_drain_scenario(ClusterName, Nodes, Servers, Scenario) -> 99: ct:pal("Running ~p~n", [Scenario]), 100: NemConf = #{nodes => Nodes, 101: scenario => Scenario}, 102: ScenarioTime = scenario_time(Scenario, 5000), 103: {ok, Nem} = nemesis:start_link(NemConf), 104: EnqInterval = 1000, 105: NumMessages = abs(erlang:trunc((ScenarioTime - 5000) / EnqInterval)), 106: EnqConf = #{cluster_name => ClusterName, 107: servers => Servers, 108: num_messages => NumMessages, 109: spec => {EnqInterval, custard}}, 110: EnqConf2 = #{cluster_name => ClusterName, 111: servers => lists:reverse(Servers), 112: num_messages => NumMessages, 113: spec => {EnqInterval, custard}}, 114: {ok, Enq} = enqueuer:start_link(enq_one, EnqConf), 115: {ok, Enq2} = enqueuer:start_link(enq_two, EnqConf2), 116: ct:pal("enqueue_checkout wait_on_scenario ~n", []), 117: ok = nemesis:wait_on_scenario(Nem, ScenarioTime * 2), 118: {applied, Applied, _} = enqueuer:wait(Enq, ScenarioTime * 2), 119: {applied, Applied2, _} = enqueuer:wait(Enq2, ScenarioTime * 2), 120: ct:pal("enqueuer:wait ~p ~n", [Applied]), 121: ct:pal("enqueuer:wait ~p ~n", [Applied2]), 122: proc_lib:stop(Nem), 123: proc_lib:stop(Enq), 124: validate_machine_state(Servers), 125: Received = drain(ClusterName, Servers), 126: validate_machine_state(Servers), 127: ct:pal("Expected ~p~nApplied ~p~nReceived ~p~nScenario: ~p~n", 128: [NumMessages, Applied, Received, Scenario]), 129: % assert no messages were lost 130: Remaining = (Applied ++ Applied2) -- Received, 131: ct:pal("Remaining ~p~n", [Remaining]), 132: MaxReceived = lists:max(Received), 133: Remaining =:= [] andalso NumMessages =:= MaxReceived. 134: 135: validate_machine_state(Servers) -> 136: validate_machine_state(Servers, 10). 137: 138: validate_machine_state(Servers, 0) -> 139: MacStates = [begin 140: {ok, S, _} = ra:local_query(N, fun ra_lib:id/1), 141: S 142: end || N <- Servers], 143: exit({validate_machine_state_failed, MacStates}); 144: validate_machine_state(Servers, Num) -> 145: % give the cluster a bit of time to settle first 146: timer:sleep(500), 147: MacStates = [begin 148: {ok, {IT, _} = S, _} = ra:local_query(N, fun ra_lib:id/1), 149: ct:pal("validating ~w at ~w", [N, IT]), 150: S 151: end || N <- Servers], 152: H = hd(MacStates), 153: case lists:all(fun (S) -> H =:= S end, MacStates) of 154: true -> 155: ct:pal("machine state are valid", []), 156: ok; 157: false -> 158: validate_machine_state(Servers, Num-1) 159: end. 160: 161: select_some(Servers) -> 162: N = trunc(length(Servers) / 2), 163: element(1, 164: lists:foldl(fun (_, {Selected, Rem0}) -> 165: {S, Rem} = random_element(Rem0), 166: {[S | Selected], Rem} 167: end, {[], Servers}, lists:seq(1, N))). 168: 169: random_element(Nodes) -> 170: Selected = lists:nth(rand:uniform(length(Nodes)), Nodes), 171: {Selected, lists:delete(Selected, Nodes)}. 172: 173: scenario_time([], Acc) -> 174: Acc; 175: scenario_time([heal | Rest], Acc) -> 176: scenario_time(Rest, Acc); 177: scenario_time([{app_restart, _} | Rest], Acc) -> 178: scenario_time(Rest, Acc + 100); 179: scenario_time([{_, T} | Rest], Acc) -> 180: scenario_time(Rest, Acc + T); 181: scenario_time([{_, _, T} | Rest], Acc) -> 182: scenario_time(Rest, Acc + T). 183: 184: 185: drain(ClusterName, Nodes) -> 186: ct:pal("draining ~w", [ClusterName]), 187: F = ra_fifo_client:init(ClusterName, Nodes), 188: drain0(F, []). 189: 190: drain0(S0, Acc) -> 191: case ra_fifo_client:dequeue(<<"c1">>, settled, S0) of 192: {ok, {_, {_, {custard, Msg}}}, S} -> 193: drain0(S, [Msg | Acc]); 194: {ok, empty, _} -> 195: Acc; 196: Err -> 197: %% oh dear 198: ct:pal("drain failed after draining ~w~n with ~w", [Acc, Err]), 199: exit(Err) 200: end. 201: 202: erlang_nodes(5) -> 203: [ 204: foo1@localhost, 205: foo2@localhost, 206: foo3@localhost, 207: foo4@localhost, 208: foo5@localhost 209: ]. 210: 211: prepare_erlang_cluster(Config, Nodes) -> 212: Config0 = tcp_inet_proxy_helpers:configure_dist_proxy(Config), 213: erlang_node_helpers:start_erlang_nodes(Nodes, Config0), 214: Config0. 215: 216: setup_ra_cluster(Config, Machine) -> 217: Nodes = ?config(nodes, Config), 218: Name = ?config(name, Config), 219: DataDir = data_dir(Config), 220: ok = ra_lib:make_dir(DataDir), 221: 222: Configs = lists:map( 223: fun(Node) -> 224: ct:pal("Start app on ~p~n", [Node]), 225: C = make_server_config(Name, Nodes, Node, Machine), 226: ok = ct_rpc:call(Node, ?MODULE, node_setup, [DataDir]), 227: ok = ct_rpc:call(Node, application, load, [ra]), 228: ok = ct_rpc:call(Node, application, set_env, 229: [ra, data_dir, [DataDir]]), 230: ok = ct_rpc:call(Node, ra, start, []), 231: 232: ok = ct_rpc:call(Node, logger, set_primary_config, 233: [level, all]), 234: C 235: end, 236: Nodes), 237: lists:map(fun(#{id := {_, Node}} = ServerConfig) -> 238: ct:pal("Start ra server on ~p~n", [Node]), 239: ok = ct_rpc:call(Node, ra, start_server, [ServerConfig]), 240: ServerConfig 241: end, 242: Configs), 243: ServerId = {Name, hd(Nodes)}, 244: ok = ra:trigger_election(ServerId), 245: ServerId. 246: 247: node_setup(DataDir) -> 248: ok = ra_lib:make_dir(DataDir), 249: LogFile = filename:join([DataDir, atom_to_list(node()), "ra.log"]), 250: SaslFile = filename:join([DataDir, atom_to_list(node()), "ra_sasl.log"]), 251: logger:set_primary_config(level, debug), 252: Config = #{config => #{type => {file, LogFile}}, level => debug}, 253: logger:add_handler(ra_handler, logger_std_h, Config), 254: application:load(sasl), 255: application:set_env(sasl, sasl_error_logger, {file, SaslFile}), 256: application:stop(sasl), 257: application:start(sasl), 258: _ = error_logger:tty(false), 259: ok. 260: 261: data_dir(Config) -> 262: Cwd = ?config(priv_dir, Config), 263: filename:join(Cwd, "part"). 264: 265: make_server_config(Name, Nodes, Node, Machine) -> 266: #{cluster_name => Name, 267: id => {Name, Node}, 268: uid => atom_to_binary(Name, utf8), 269: initial_members => [{Name, N} || N <- Nodes], 270: log_init_args => 271: #{uid => atom_to_binary(Name, utf8)}, 272: machine => Machine, 273: await_condition_timeout => 5 274: }. 275: 276: run_proper(Fun, Args, NumTests) -> 277: ?assertEqual( 278: true, 279: proper:counterexample(erlang:apply(Fun, Args), 280: [{numtests, NumTests}, 281: noshrink, 282: {on_output, fun(".", _) -> ok; % don't print the '.'s on new lines 283: (F, A) -> ct:pal(?LOW_IMPORTANCE, F, A) end}])).