# To uncomment if run separately, keep commented if in the script
include("setup.jl");Experiment 2, cleaning the data
Clean the data for treatment and lay out the assumptions behind the cleaning.
Reading the Data
Time Spent
Read the table where the time spent on each page is kept. Then apply the function treatment to each subject.
Because of how we programmed the experiment, page 6 does not exist in the data and is therefore skipped. The epoch_time_completed for the page index 0 is exactly the same as the one used in time_started_utc from the data. So we do not need to keep it here.
n_pages = 11;
pages = append!(collect(0:6),collect(8:10));timespent = CSV.read(joinpath("..", "Data", "OriginalData", "PageTimes-2023-08-15.csv"), DataFrame, normalizenames = true)
timedata = combine(groupby(timespent, :participant_code), x->treatment(x, pages = pages));
for i = 1:(length(pages) -1)
transform!(timedata, Symbol("time_page_$i") => ByRow(x -> (missingtime(x))) => Symbol("time_page_$i"))
endRaw Data
Read the raw data (after the anonymization)
rawdata = CSV.read(joinpath("..", "Data", "OriginalData", "Experiment2RawData.csv"), DataFrame, normalizenames=true);Aggregate All the Data
data = innerjoin(rawdata, timedata, on = :participant_code);Cleaning the Data
Finishing the Experiments
Removed subjects who have not finished the experiment
Then keep only subjects who have finished the experiment, and remove the finished column as a consequence.
data = data[.!ismissing.(data[!, :participant_current_page_name]).& (data[!, :participant_current_page_name] .== "Results"), :];
select!(data, Not([:finished])); Selecting a subset of columns
We keep on the columns from the following relevantcolumns variable.
select!(data, Not(r"player_role$"))
select!(data, Not(r"group*"))
select!(data, Not(r"payoff*"))
rename!(data, :Mechanism_1_player_wta => :wta)
select!(data, Not(r"Mechanism*"));apps = ["Algorithms", "Beliefs", "Mechanism", "Feedback", "Questionnaire"]
models = ["player", "subsession"]
for col = names(data)
for app = apps, model = models
m = match(Regex("$app") * r"_(?<round>\d)_" * Regex("$model") * r"_(?<column>\w+)", col)
if !(m === nothing)
#println(m)
if (app != "Beliefs") & (m[:column] == "round_number")
select!(data, Not(col))
elseif (app == "Beliefs")
if (m[:column] == "criteria")
select!(data, Not(col))
elseif m[:column] == "belief"
rename!(data, col => Symbol(m[:column] * "_" * m[:round]))
end
else
rename!(data, col => Symbol(m[:column]))
end
end
end
m = match(r"participant_(?<column>\w+)", col)
if !(m === nothing)
rename!(data, col => m[:column])
end
endIn the Prolific data, we have a variable that take into account the whole time taken for the expirement, which is a more reliable measure than what we have from oTree, because it takes also into account the time spent at the end of the experiment. We therefore replace the time_in_experiment variable with the Time_taken one.
data.time_in_experiment = data.Time_taken;# To be consistent with the AMT data, we rename `Country_of_residence` to `country`.
rename!(data, :Country_of_residence => :country);Keeping relevant columns
relevantcolumns = [:code,
:time_started_utc,
:criteria_choice1,
:criteria_choice2,
:criteria_choice3,
:criteria_choice4,
:criteria_choice5,
:lottery_choice1,
:lottery_choice2,
:lottery_choice3,
:lottery_choice4,
:lottery_choice5,
:Age,
:Sex,
:Employment_status,
:Student_status,
:Ethnicity_simplified,
:country,
:time_in_experiment,
:time_page_1,
:time_page_2,
:time_page_3,
:time_page_4,
:time_page_5,
:time_page_6,
:time_page_7,
:time_page_8,
:time_page_9,
:criteria,
:criteria_first,
:criteria_control,
:lottery_control,
:criteria_choices,
:lottery_choices,
:arrival_code,
:belief_1,
:belief_2,
:mechanism,
:best_mechanism,
:number,
:wta,
]41-element Vector{Symbol}:
:code
:time_started_utc
:criteria_choice1
:criteria_choice2
:criteria_choice3
:criteria_choice4
:criteria_choice5
:lottery_choice1
:lottery_choice2
:lottery_choice3
:lottery_choice4
:lottery_choice5
:Age
⋮
:criteria_first
:criteria_control
:lottery_control
:criteria_choices
:lottery_choices
:arrival_code
:belief_1
:belief_2
:mechanism
:best_mechanism
:number
:wta
select!(data, relevantcolumns);Transform Columns Types
Transform some string columns with only two possibilities into boolean columns.
# Make one of the column that should be a boolean a boolean
transform!(data, :criteria_first => (x -> Bool.(x)) => :criteria_first);# Transform the date in a proper date.
dformat = DateFormat("y-m-dTH:M:S.s")
data[!, :time_started_utc] = (x -> DateTime.(x[1:end-1], dformat)).(data[!, :time_started_utc]);Parsing choices into their proper formats
# Parsing the choices
data[!, :criteria_choices] = criteriachoices.(data[!, :criteria_choices]);
for i = 1:5
transform!(data, :arrival_code => ByRow(x -> parse(Int, string(x)[i])) => Symbol("arrival_code$i"))
end# Tranform the lottery choices into their proper Bool representation
transform!(data, :lottery_choices => ByRow(x -> Int.(split(strip(x, [']', '[']), ", ") .== repeat(["True"], 5))) => :lottery_choices);## Assigning the beliefs to the criteria or the lottery.
data[!, :criteria_belief] = falses(size(data, 1))
data[!, :lottery_belief] = falses(size(data, 1))
data[data[!, :criteria_first], :criteria_belief] = (data[data[!, :criteria_first], :belief_1] .== 1)
data[.!data[!, :criteria_first], :criteria_belief] = (data[.!data[!, :criteria_first], :belief_2] .== 1)
data[data[!, :criteria_first], :lottery_belief] = (data[data[!, :criteria_first], :belief_2] .== 1)
data[.!data[!, :criteria_first], :lottery_belief] = (data[.!data[!, :criteria_first], :belief_1] .== 1)
select!(data, Not([:belief_1, :belief_2]));booleancolumns = append!([:lottery_control, :criteria_control], [Symbol("lottery_choice$i") for i = 1:5])
for col = booleancolumns
data[!, col] = (data[!, col] .== 1)
endTransform the mechanism column into a boolean. Value of 1 if RPS, 0 if Coin Toss.
data[!, :mechanism] = .!(data[!, :mechanism] .== "DC-5 Lottery");# Create a boolean telling whether the non-lottery was believed better than the lottery or not.
data[!, :best_mechanism] = .!(data[!, :best_mechanism] .== "DC-5 Lottery");# Transforming the answer about gender into dummies
data[!, :female] = (data[!, :Sex] .== "Female")
data[!, :male] = (data[!, :Sex] .== "Male");
data[!, :other] = (data[!, :Sex] .== "Other")
select!(data, Not(:Sex));Create a variable characterizing the six different combination of treatments that are possible.
data.treatment .= "RPS Winner, Control, Lottery, No Control"
data[(data[!, :criteria] .== "Rock, Paper, Scissors") .& .!data[!, :lottery_control] .& data[!, :criteria_control], :treatment] .= "RPS Winner, Control, Lottery, No Control"
data[(data[!, :criteria] .== "Time") .& .!data[!, :lottery_control] .& data[!, :criteria_control], :treatment] .= "Time, Control, Lottery, No Control";Computing the Payments
include("PaymentFunctions.jl")create_groups
# Computing belief payments
selected_belief_question = dc4_2023 % 3 + 1
data.prediction_payed .= :best_mechanism
if selected_belief_question == 1
data[data[!, :criteria_first], :prediction_payed] .= :criteria
data[.!data[!, :criteria_first], :prediction_payed] .= :lottery
elseif selected_belief_question == 2
data[.!data[!, :criteria_first], :prediction_payed] .= :criteria
data[data[!, :criteria_first], :prediction_payed] .= :lottery
end285-element view(::Vector{Symbol}, [2, 5, 6, 9, 10, 13, 14, 17, 18, 20 … 556, 559, 560, 563, 566, 567, 570, 573, 574, 577]) with eltype Symbol:
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
⋮
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
:lottery
data[!, :payment] .= participation_fee[2021]
data[!, :bonus] .= 0.577-element Vector{Float64}:
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
⋮
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
for name = ["lottery", "criteria"]
data[!, Symbol("$(name)_winner")] = missings(Bool, size(data, 1))
data[!, Symbol("$(name)_ranks")] = zeros(Int, size(data, 1))
data[!, Symbol("$(name)_score")] = missings(Int, size(data, 1))
data[!, Symbol("$(name)_belief_winner")] = missings(Bool, size(data, 1))
endNow working only on the split data.
# Separating the participants in different groups by number.
transform!(groupby(data, :criteria), x -> create_groups(x, :number)); # Separating the data by criteria type and group.
sepdata = groupby(data, [:criteria, :group])
for key = keys(sepdata)
println("Criteria: ", key[:criteria], ", Group: ", key[:group])
winner(sepdata[key], x -> lotteryrank(x, dc5_2023), "lottery")
used_number = dc2_2023
chosenmechanism = sepdata[key][sepdata[key][!, :number] .== used_number, :mechanism]
while isempty(chosenmechanism)
if used_number == 99
used_number = 0
else
used_number += 1
end
chosenmechanism = (sepdata[key][sepdata[key][!, :number] .== used_number, :mechanism])
end
chosenmechanism = chosenmechanism[1]
chosen_wta = (sepdata[key][sepdata[key][!, :number] .== used_number, :wta])
if parse(Int, string(dc3_2023)[1]) > 4 # If the number drawn is 5 or above, the second question is used for payment.
drawn_threshold > wta # Have to add that to the winner
sepdata[key][sepdata[key][!, :number] .== used_number, :payment] += drawn_threshold
if chosenmechanism == "DC-5 Lottery"
chosenmechanism = sepdata[key][sepdata[key][!, :number] == used_number, :mechanism][1]
else
chosenmechanism = "DC-5 Lottery"
end
end
println("Mechanism chosen to attribute the reward: ", chosenmechanism)
expe_number = 1
if key[:criteria] == "Time"
winner(sepdata[key], arrivaltimeranks, "criteria")
expe_number = 2
elseif key[:criteria] == "Rock, Paper, Scissors"
winner(sepdata[key], totalrpsranks, "criteria")
elseif key[:criteria] == "Guessing the Paintings"
winner(sepdata[key], x -> paintingrank(x, elias3), "criteria")
end
beliefwinner(sepdata[key], "criteria")
beliefwinner(sepdata[key], "lottery")
for row = eachrow(sepdata[key])
if chosenmechanism == "DC-5 Lottery"
reward_mechanism = "lottery"
else
reward_mechanism = "criteria"
end
row[:payment] += high_reward[2023] * row[Symbol("$(reward_mechanism)_winner")] +
low_reward * (row[:criteria_belief_winner] * (row[:prediction_payed] .== :criteria) + (row[:prediction_payed] .== :lottery) * row[:lottery_belief_winner])
end
## Computing only the bonus payment compared to what has already been payed on Prolific.
sepdata[key].payment .= round.(sepdata[key].payment, digits = 2)
if chosenmechanism == "DC-5 Lottery"
sorted_data = sort(sepdata[key], [:lottery_ranks, order(:payment, rev=true)])
else
sorted_data = sort(sepdata[key], [:criteria_ranks, order(:payment, rev=true)])
end
sepdata[key].expe .= expe_number
endCriteria: Time, Group: 1
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
There are more winner than normally should be the case. It may be a normal behavior.
Criteria: Time, Group: 2
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Mechanism chosen to attribute the reward: false
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
There are more winner than normally should be the case. It may be a normal behavior.
Criteria: Time, Group: 3
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 4
Entering tie-breaking in for mechanism lottery
There are more winner than normally should be the case. It may be a normal behavior.
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Criteria: Time, Group: 4
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 4
Entering tie-breaking in for mechanism criteria
There are more winner than normally should be the case. It may be a normal behavior.
Criteria: Time, Group: 5
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 3
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: false
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Criteria: Time, Group: 6
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: false
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Criteria: Rock, Paper, Scissors, Group: 1
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
Criteria: Rock, Paper, Scissors, Group: 2
Remaining winners to attribute: 4
Remaining subjects whose winning status has not been characterized: 5
Entering tie-breaking in for mechanism lottery
There are more winner than normally should be the case. It may be a normal behavior.
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Criteria: Rock, Paper, Scissors, Group: 3
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Criteria: Rock, Paper, Scissors, Group: 4
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 3
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Criteria: Rock, Paper, Scissors, Group: 5
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 4
Entering tie-breaking in for mechanism criteria
Criteria: Rock, Paper, Scissors, Group: 6
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Mechanism chosen to attribute the reward: true
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
treatments = unique(data[!, :treatment])2-element Vector{String}:
"Time, Control, Lottery, No Control"
"RPS Winner, Control, Lottery, No Control"
# Transforms the ranks in percentages in order to relate them to each other.
transform!(data, :criteria_ranks => (x -> Float64.(x)) => :criteria_ranks)
transform!(data, :lottery_ranks =>(x -> Float64.(x)) => :lottery_ranks)
for t = treatments
data[(data[!, :treatment] .== t), :criteria_ranks] .= data[(data[!, :treatment].==t), :criteria_ranks] ./ sum(data[!, :treatment].==t)
data[(data[!, :treatment] .== t), :lottery_ranks] .= data[(data[!, :treatment].==t), :lottery_ranks] ./ sum(data[!, :treatment].==t)
end# Saving the Cleaned Data
data |> CSV.write(joinpath("..", "Data", "Input", "Experiment2CleanedData.csv"), delim = ',')"../Data/Input/Experiment2CleanedData.csv"