1: -module(partitions_SUITE).
    2: -compile(export_all).
    3: 
    4: -include_lib("common_test/include/ct.hrl").
    5: -include_lib("proper/include/proper.hrl").
    6: -include_lib("eunit/include/eunit.hrl").
    7: 
    8: all() -> [
    9:           {group, tests}
   10:          ].
   11: 
   12: groups() ->
   13:     Tests = [
   14:              enq_drain_basic
   15:              % prop_enq_drain
   16:             ],
   17:     [{tests, [], Tests}].
   18: 
   19: init_per_group(_, Config) -> Config.
   20: 
   21: end_per_group(_, _Config) -> ok.
   22: 
   23: init_per_testcase(print, Config0) ->
   24:     Nodes = erlang_nodes(5),
   25:     Servers = [{print, N} || N <- Nodes],
   26:     [{cluster_name, print},
   27:      {nodes, Nodes}, {servers, Servers},
   28:      {name, print} | Config0];
   29: init_per_testcase(TestCase, Config0) ->
   30:     Nodes = erlang_nodes(5),
   31:     Servers = [{TestCase, N} || N <- Nodes],
   32:     Config1 = prepare_erlang_cluster(Config0, Nodes),
   33:     Config = [{cluster_name, TestCase},
   34:               {nodes, Nodes}, {servers, Servers},
   35:               {name, TestCase} | Config1],
   36:     Machine = {module, ra_fifo, #{}},
   37:     ServerId = setup_ra_cluster(Config, Machine),
   38:     %% Make sure nodes are synchronised
   39:     ct:pal("Members ~p~n", [ra:members(ServerId)]),
   40:     Config.
   41: 
   42: end_per_testcase(print, Config) ->
   43:     Config;
   44: end_per_testcase(_, Config) ->
   45:     Nodes = ?config(nodes, Config),
   46:     ct:pal("end_per_testcase: Stopping nodes ~p~n", [Nodes]),
   47:     erlang_node_helpers:stop_erlang_nodes(Nodes),
   48:     ct:pal("end_per_testcase: Stopped nodes ~p~n", [Nodes]),
   49:     ok.
   50: 
   51: -type nodes5() :: foo1@localhost |
   52:                   foo2@localhost |
   53:                   foo3@localhost |
   54:                   foo4@localhost |
   55:                   foo5@localhost.
   56: 
   57: -type actions() :: {part, [nodes5()], 500..10000} |
   58:                    {wait, 500..10000}.
   59: 
   60: -type wait_time() :: 500..10000.
   61: 
   62: prop_enq_drain(Config) ->
   63:     ClusterName = ?config(cluster_name, Config),
   64:     Nodes = ?config(nodes, Config),
   65:     Servers = ?config(servers, Config),
   66:     run_proper(
   67:       fun () ->
   68:               ?FORALL(S, resize(
   69:                            10,
   70:                            non_empty(
   71:                              list(
   72:                                oneof([{wait, wait_time()},
   73:                                       heal,
   74:                                       {part, vector(2, nodes5()),
   75:                                        wait_time()}])))),
   76:                       do_enq_drain_scenario(ClusterName,
   77:                                             Nodes, Servers,
   78:                                             [{wait, 5000}] ++ S ++
   79:                                             [heal, {wait, 5000}]))
   80:       end, [], 10).
   81: 
   82: print_scenario(Scenario) ->
   83:     ct:pal("Scenario ~p~n", [Scenario]),
   84:     true.
   85: 
   86: enq_drain_basic(Config) ->
   87:     ClusterName = ?config(cluster_name, Config),
   88:     Nodes = ?config(nodes, Config),
   89:     Servers = ?config(servers, Config),
   90:     Scenario = [{wait, 5000},
   91:                 {part, select_some(Nodes), 5000},
   92:                 {app_restart, select_some(Servers)},
   93:                 {wait, 5000},
   94:                 {part, select_some(Nodes), 20000},
   95:                 {wait, 5000}],
   96:     true = do_enq_drain_scenario(ClusterName, Nodes, Servers, Scenario).
   97: 
   98: do_enq_drain_scenario(ClusterName, Nodes, Servers, Scenario) ->
   99:     ct:pal("Running ~p~n", [Scenario]),
  100:     NemConf = #{nodes => Nodes,
  101:                 scenario => Scenario},
  102:     ScenarioTime = scenario_time(Scenario, 5000),
  103:     {ok, Nem} = nemesis:start_link(NemConf),
  104:     EnqInterval = 1000,
  105:     NumMessages = abs(erlang:trunc((ScenarioTime - 5000) / EnqInterval)),
  106:     EnqConf = #{cluster_name => ClusterName,
  107:                 servers => Servers,
  108:                 num_messages => NumMessages,
  109:                 spec => {EnqInterval, custard}},
  110:     EnqConf2 = #{cluster_name => ClusterName,
  111:                  servers => lists:reverse(Servers),
  112:                  num_messages => NumMessages,
  113:                  spec => {EnqInterval, custard}},
  114:     {ok, Enq} = enqueuer:start_link(enq_one, EnqConf),
  115:     {ok, Enq2} = enqueuer:start_link(enq_two, EnqConf2),
  116:     ct:pal("enqueue_checkout wait_on_scenario ~n", []),
  117:     ok = nemesis:wait_on_scenario(Nem, ScenarioTime * 2),
  118:     {applied, Applied, _} = enqueuer:wait(Enq, ScenarioTime * 2),
  119:     {applied, Applied2, _} = enqueuer:wait(Enq2, ScenarioTime * 2),
  120:     ct:pal("enqueuer:wait ~p ~n", [Applied]),
  121:     ct:pal("enqueuer:wait ~p ~n", [Applied2]),
  122:     proc_lib:stop(Nem),
  123:     proc_lib:stop(Enq),
  124:     validate_machine_state(Servers),
  125:     Received = drain(ClusterName, Servers),
  126:     validate_machine_state(Servers),
  127:     ct:pal("Expected ~p~nApplied ~p~nReceived ~p~nScenario: ~p~n",
  128:            [NumMessages, Applied, Received, Scenario]),
  129:     % assert no messages were lost
  130:     Remaining = (Applied ++ Applied2) -- Received,
  131:     ct:pal("Remaining ~p~n", [Remaining]),
  132:     MaxReceived = lists:max(Received),
  133:     Remaining =:= [] andalso NumMessages =:= MaxReceived.
  134: 
  135: validate_machine_state(Servers) ->
  136:     validate_machine_state(Servers, 10).
  137: 
  138: validate_machine_state(Servers, 0) ->
  139:     MacStates = [begin
  140:                      {ok, S, _} = ra:local_query(N, fun ra_lib:id/1),
  141:                      S
  142:                  end || N <- Servers],
  143:     exit({validate_machine_state_failed, MacStates});
  144: validate_machine_state(Servers, Num) ->
  145:     % give the cluster a bit of time to settle first
  146:     timer:sleep(500),
  147:     MacStates = [begin
  148:                      {ok, {IT, _} = S, _} = ra:local_query(N, fun ra_lib:id/1),
  149:                      ct:pal("validating ~w at ~w", [N, IT]),
  150:                      S
  151:                  end || N <- Servers],
  152:     H = hd(MacStates),
  153:     case lists:all(fun (S) -> H =:= S end, MacStates) of
  154:         true ->
  155:             ct:pal("machine state are valid", []),
  156:             ok;
  157:         false ->
  158:             validate_machine_state(Servers, Num-1)
  159:     end.
  160: 
  161: select_some(Servers) ->
  162:     N = trunc(length(Servers) / 2),
  163:     element(1,
  164:             lists:foldl(fun (_, {Selected, Rem0}) ->
  165:                                 {S, Rem} = random_element(Rem0),
  166:                                 {[S | Selected], Rem}
  167:                         end, {[], Servers}, lists:seq(1, N))).
  168: 
  169: random_element(Nodes) ->
  170:     Selected = lists:nth(rand:uniform(length(Nodes)), Nodes),
  171:     {Selected, lists:delete(Selected, Nodes)}.
  172: 
  173: scenario_time([], Acc) ->
  174:     Acc;
  175: scenario_time([heal | Rest], Acc) ->
  176:     scenario_time(Rest, Acc);
  177: scenario_time([{app_restart, _} | Rest], Acc) ->
  178:     scenario_time(Rest, Acc + 100);
  179: scenario_time([{_, T} | Rest], Acc) ->
  180:     scenario_time(Rest, Acc + T);
  181: scenario_time([{_, _, T} | Rest], Acc) ->
  182:     scenario_time(Rest, Acc + T).
  183: 
  184: 
  185: drain(ClusterName, Nodes) ->
  186:     ct:pal("draining ~w", [ClusterName]),
  187:     F = ra_fifo_client:init(ClusterName, Nodes),
  188:     drain0(F, []).
  189: 
  190: drain0(S0, Acc) ->
  191:     case ra_fifo_client:dequeue(<<"c1">>, settled, S0) of
  192:         {ok, {_, {_, {custard, Msg}}}, S} ->
  193:             drain0(S, [Msg | Acc]);
  194:         {ok, empty, _} ->
  195:             Acc;
  196:         Err ->
  197:             %% oh dear
  198:             ct:pal("drain failed after draining ~w~n with ~w", [Acc, Err]),
  199:             exit(Err)
  200:     end.
  201: 
  202: erlang_nodes(5) ->
  203:     [
  204:      foo1@localhost,
  205:      foo2@localhost,
  206:      foo3@localhost,
  207:      foo4@localhost,
  208:      foo5@localhost
  209:      ].
  210: 
  211: prepare_erlang_cluster(Config, Nodes) ->
  212:     Config0 = tcp_inet_proxy_helpers:configure_dist_proxy(Config),
  213:     erlang_node_helpers:start_erlang_nodes(Nodes, Config0),
  214:     Config0.
  215: 
  216: setup_ra_cluster(Config, Machine) ->
  217:     Nodes = ?config(nodes, Config),
  218:     Name = ?config(name, Config),
  219:     DataDir = data_dir(Config),
  220:     ok = ra_lib:make_dir(DataDir),
  221: 
  222:     Configs = lists:map(
  223:                 fun(Node) ->
  224:                         ct:pal("Start app on ~p~n", [Node]),
  225:                         C = make_server_config(Name, Nodes, Node, Machine),
  226:                         ok = ct_rpc:call(Node, ?MODULE, node_setup, [DataDir]),
  227:                         ok = ct_rpc:call(Node, application, load, [ra]),
  228:                         ok = ct_rpc:call(Node, application, set_env,
  229:                                          [ra, data_dir, [DataDir]]),
  230:                         ok = ct_rpc:call(Node, ra, start, []),
  231: 
  232:                         ok = ct_rpc:call(Node, logger, set_primary_config,
  233:                                          [level, all]),
  234:                         C
  235:                 end,
  236:                 Nodes),
  237:     lists:map(fun(#{id := {_, Node}} = ServerConfig) ->
  238:                       ct:pal("Start ra server on ~p~n", [Node]),
  239:                       ok = ct_rpc:call(Node, ra, start_server, [ServerConfig]),
  240:                       ServerConfig
  241:               end,
  242:               Configs),
  243:     ServerId = {Name, hd(Nodes)},
  244:     ok = ra:trigger_election(ServerId),
  245:     ServerId.
  246: 
  247: node_setup(DataDir) ->
  248:     ok = ra_lib:make_dir(DataDir),
  249:     LogFile = filename:join([DataDir, atom_to_list(node()), "ra.log"]),
  250:     SaslFile = filename:join([DataDir, atom_to_list(node()), "ra_sasl.log"]),
  251:     logger:set_primary_config(level, debug),
  252:     Config = #{config => #{type => {file, LogFile}}, level => debug},
  253:     logger:add_handler(ra_handler, logger_std_h, Config),
  254:     application:load(sasl),
  255:     application:set_env(sasl, sasl_error_logger, {file, SaslFile}),
  256:     application:stop(sasl),
  257:     application:start(sasl),
  258:     _ = error_logger:tty(false),
  259:     ok.
  260: 
  261: data_dir(Config) ->
  262:     Cwd = ?config(priv_dir, Config),
  263:     filename:join(Cwd, "part").
  264: 
  265: make_server_config(Name, Nodes, Node, Machine) ->
  266:     #{cluster_name => Name,
  267:       id => {Name, Node},
  268:       uid => atom_to_binary(Name, utf8),
  269:       initial_members => [{Name, N} || N <- Nodes],
  270:       log_init_args =>
  271:       #{uid => atom_to_binary(Name, utf8)},
  272:       machine =>  Machine,
  273:       await_condition_timeout => 5
  274:      }.
  275: 
  276: run_proper(Fun, Args, NumTests) ->
  277:     ?assertEqual(
  278:        true,
  279:        proper:counterexample(erlang:apply(Fun, Args),
  280: 			     [{numtests, NumTests},
  281:                   noshrink,
  282: 			      {on_output, fun(".", _) -> ok; % don't print the '.'s on new lines
  283: 					     (F, A) -> ct:pal(?LOW_IMPORTANCE, F, A) end}])).