# To uncomment if run separately
include("setup.jl");Experiment 1, session 2, cleaning the data
Clean the raw data file, and lays out the assumption behind the cleaning.
Reading the Data
Time Spent
Read the table where the time spent on each page is kept. Then apply the function treatment to each subject.
Because of how we programmed the experiment, page 6 does not exist in the data and is therefore skipped. The epoch_time_completed for the page index 0 is exactly the same as the one used in participant_time_started_utc from the data. So we do not need to keep it here.
n_pages = 11;
pages = append!(collect(0:5),collect(7:11));timespent = CSV.read(joinpath("..", "Data", "OriginalData", "PageTimes-2021-07-07.csv"), DataFrame, normalizenames = true)
timedata = combine(groupby(timespent, :participant_code), x->treatment(x, pages = pages));
for i = 1:(length(pages) -1)
transform!(timedata, Symbol("time_page_$i") => ByRow(x -> (missingtime(x))) => Symbol("time_page_$i"))
end
transform!(timedata, :time_in_experiment => ByRow(x -> (missingtime(x))) => :time_in_experiment);Raw Data
Read the raw data (after the anonymization)
rawdata = CSV.read(joinpath("..", "Data", "OriginalData", "Experiment1_BRawData.csv"), DataFrame, normalizenames=true);Aggregate All the Data
data = innerjoin(rawdata, timedata, on = :participant_code);custom_data = CSV.read(joinpath("..", "Data", "OriginalData", "Feedback_2021-07-07.csv"), DataFrame, normalizenames=true, truestrings = ["true", "True", "1"], falsestrings = ["false", "False", "0"], pool = false);data = innerjoin(data, custom_data, on = [:participant_code,:session_code]); Cleaning the Data
Finishing the Experiments
Removed subjects who have not finished the experiment
Then keep only subjects who have finished the experiment, and remove the finished column as a consequence.
data = data[.!ismissing.(data[!, :participant_current_page_name]).& (data[!, :participant_current_page_name] .== "Results"), :];
select!(data, Not([:finished])); Selecting a subset of columns
We keep on the columns from the following relevantcolumns variable.
select!(data, Not(r"player_role$"))
select!(data, Not(r"group*"))
select!(data, Not(r"payoff*")) # Must check that it does not ruin the payments
select!(data, Not(r"Mechanism*"));apps = ["Algorithms", "Beliefs", "Mechanism", "Feedback", "Questionnaire"]
models = ["player", "subsession"]
for col = names(data)
for app = apps, model = models
m = match(Regex("$app") * r"_(?<round>\d)_" * Regex("$model") * r"_(?<column>\w+)", string(col))
if !(m === nothing)
#println(m)
if (app != "Beliefs") & (m[:column] == "round_number")
select!(data, Not(col))
elseif (app == "Beliefs")
if (m[:column] == "criteria")
select!(data, Not(col))
elseif m[:column] == "belief"
rename!(data, col => Symbol(m[:column] * "_" * m[:round]))
end
else
rename!(data, col => Symbol(m[:column]))
end
end
end
end# Keeping relevant columnsrelevantcolumns = [:participant_code,
:participant_time_started_utc,
:criteria_choice1,
:criteria_choice2,
:criteria_choice3,
:criteria_choice4,
:criteria_choice5,
:lottery_choice1,
:lottery_choice2,
:lottery_choice3,
:lottery_choice4,
:lottery_choice5,
:age,
:gender,
:employment,
:region,
:urn,
:colour,
:colour_drawn,
:urn_winner,
:reasons,
:unique_id,
:time_in_experiment,
:time_page_1,
:time_page_2,
:time_page_3,
:time_page_4,
:time_page_5,
:time_page_6,
:time_page_7,
:time_page_8,
:time_page_9,
:time_page_10,
:criteria,
:criteria_first,
:control,
:criteria_choices,
:lottery_choices,
:arrival_code,
:belief_1,
:belief_2,
:mechanism,
:best_mechanism,
]
select!(data, relevantcolumns);Transform Columns Types
Transform some string columns with only two possibilities into boolean columns.
# Make one of the column that should be a boolean a boolean
transform!(data, :criteria_first => (x -> Bool.(x)) => :criteria_first);# Transform the date in a proper date.
dformat = DateFormat("y-m-d H:M:S.s")
data[!, :participant_time_started_utc] = (x -> DateTime.(x[1:end-3], dformat)).(data[!, :participant_time_started_utc]);Parsing choices into their proper formats
# Parsing the choices
data[!, :criteria_choices] = criteriachoices.(data[!, :criteria_choices]);transform!(data, :arrival_code => ByRow(x -> arrivalcode(x)) => :arrival_code)
for i = 1:5
transform!(data, :arrival_code => ByRow(x -> parse(Int, string(x)[i])) => Symbol("arrival_code$i"))
end# Tranform the lottery choices into their proper Bool representation
transform!(data, :lottery_choices => ByRow(x -> Int.(split(strip(x, [']', '[']), ", ") .== repeat(["True"], 5))) => :lottery_choices);## Assigning the beliefs to the criteria or the lottery.
data[!, :criteria_belief] = falses(size(data, 1))
data[!, :lottery_belief] = falses(size(data, 1))
data[data[!, :criteria_first], :criteria_belief] = (data[data[!, :criteria_first], :belief_1] .== 1)
data[.!data[!, :criteria_first], :criteria_belief] = (data[.!data[!, :criteria_first], :belief_2] .== 1)
data[data[!, :criteria_first], :lottery_belief] = (data[data[!, :criteria_first], :belief_2] .== 1)
data[.!data[!, :criteria_first], :lottery_belief] = (data[.!data[!, :criteria_first], :belief_1] .== 1)
select!(data, Not([:belief_1, :belief_2]));booleancolumns = append!([:urn_winner, :control], [Symbol("lottery_choice$i") for i = 1:5])
for col = booleancolumns
data[!, col] = (data[!, col] .== 1)
endTransform the mechanism column into a boolean. Value of 1 if RPS, 0 if Coin Toss.
data[!, :mechanism] = .!(data[!, :mechanism] .== "DC-5 Lottery");# Create a boolean telling whether the non-lottery was believed better than the lottery or not.
data[!, :best_mechanism] = .!(data[!, :best_mechanism] .== "DC-5 Lottery");Transform the colours of the balls in the ambiguity choice into boolean values. The colour black is transformed in 1, on red into 0.
data[!, :colour] = (data[!, :colour] .== "Black");Consider that everyone that chose the left urn is ambiguity averse. This is a debatable assumption, but the best we can do with the available data. The difference in the number of ambiguity averse / ambiguity loving subjects is correct if ambiguity neutral subjects randonly choose between the right and left urns.
Remove then the :urn column that encodes exactly the same information.
data[!, :ambiguity_averse] = (data[!, :urn] .== "Urn Left");
select!(data, Not(:urn));# Creating gender dummies
data[!, :female] = (data[!, :gender] .== "Female")
data[!, :male] = (data[!, :gender] .== "Male");
data[!, :other] = (data[!, :gender] .== "Other")
select!(data, Not(:gender));# Create a variable characterizing the six different combination of treatments that are possible.
data[!, :treatment] .= "RPS Winner, No Control, Lottery, Control"
data[(data[!, :criteria] .== "Rock, Paper, Scissors") .& .!data[!, :control], :treatment] .= "RPS Winner, No Control, Lottery, No Control"
data[(data[!, :criteria] .== "Time") .& data[!, :control], :treatment] .= "Time, Control, Lottery, Control"
data[(data[!, :criteria] .== "Time") .& .!data[!, :control], :treatment] .= "Time, Control, Lottery, No Control"
data[(data[!, :criteria] .== "Guessing the Paintings") .& data[!, :control], :treatment] .= "Paintings, No Control, Lottery, Control"
data[(data[!, :criteria] .== "Guessing the Paintings") .& .!data[!, :control], :treatment] .= "Paintings, No Control, Lottery, No Control";# Create dummys `:criteria_control` for criteria with and without control, rename the `:control` for lottery as `:lottery_control`
rename!(data, :control=>:lottery_control)
data[!, :criteria_control] .= false
data[data[!, :criteria] .== "Time", :criteria_control] .= true;"""
Unify the definition of a country and remove the previous column `:region` that encoded the same data.
Assumes that most participants are from the USA. In particular, understand Georgia as being the USA state rather than the country.
""""Unify the definition of a country and remove the previous column `:region` that encoded the same data. \nAssumes that most participants are from the USA. In particular, understand Georgia as being the USA state rather than the country.\n"
data[!, :country] .= "USA"
for (i, region) = enumerate(data[!, :region])
if occursin(r"ukraine"i, region)
data[i, :country] = "Ukraine"
elseif occursin(r"germany"i, region)
data[i, :country] = "Germany"
elseif occursin(r"fran"i, region)
data[i, :country] = "France"
elseif occursin(r"india|kolkata|tamil"i, region)
data[i, :country] = "India"
elseif occursin(r"ital[y|ia]"i, region)
data[i, :country] = "Italy"
elseif occursin(r"Bra[zs]il"i, region)
data[i, :country] = "Brazil"
elseif occursin(r"uk|united kingdom|england"i, region)
data[i, :country] = "United Kingdom"
elseif occursin(r"canada"i, region)
data[i, :country] = "Canada"
elseif occursin(r"portugal"i, region)
data[i, :country] = "Portugal"
elseif occursin(r"sweden"i, region)
data[i, :country] = "Sweden"
elseif occursin(r"spain"i, region)
data[i, :country] = "Spain"
elseif occursin(r"bulgaria"i, region)
data[i, :country] = "Bulgaria"
elseif occursin(r"Eua"i, region)
data[i, :country] = "UAE"
elseif occursin(r"thailand"i, region)
data[i, :country] = "Thailand"
elseif occursin(r"turkey"i, region)
data[i, :country] = "Turkey"
elseif occursin(r"netherlands"i, region)
data[i, :country] = "The Netherlands"
elseif occursin(r"venezuela"i, region)
data[i, :country] = "Venezuela"
elseif occursin(r"asian"i, region)
data[i, :country] = "Asian"
end
end
select!(data, Not(:region));Computing the Payments
include("PaymentFunctions.jl")create_groups
data[!, :prediction_payed] = rand([:lottery, :criteria, :best_mechanism], size(data, 1))
data[!, :best_mechanism_winner] = missings(Bool, size(data, 1))
data[!, :payment] = data[!, :urn_winner] .* low_reward .+ participation_fee[2021]569-element Vector{Float64}:
0.8
1.0
1.0
0.8
1.0
0.8
1.0
0.8
1.0
1.0
1.0
0.8
1.0
⋮
1.0
1.0
1.0
1.0
1.0
0.8
1.0
0.8
1.0
0.8
0.8
0.8
for name = ["lottery", "criteria"]
data[!, Symbol("$(name)_winner")] = missings(Bool, size(data, 1))
data[!, Symbol("$(name)_ranks")] = zeros(Int, size(data, 1))
data[!, Symbol("$(name)_score")] = missings(Int, size(data, 1))
data[!, Symbol("$(name)_belief_winner")] = missings(Bool, size(data, 1))
end# Now working only on the split data.
sepdata = groupby(data, [:criteria, :lottery_control]);for key = keys(sepdata)
println("Criteria: ", key[:criteria], ", Criteria Control: ", sepdata[key][1, :criteria_control], ", Lottery Control: ", key[:lottery_control])
winner(sepdata[key], x -> lotteryrank(x, dc5), "lottery")
criteria_chosen = (mean(sepdata[key][!, :mechanism]) > 0.5)
if criteria_chosen
chosenmechanism = key[:criteria]
else
chosenmechanism = "DC-5 Lottery"
end
println("Mechanism chosen to attribute the reward: ", chosenmechanism)
if key[:criteria] == "Time"
winner(sepdata[key], arrivaltimeranks, "criteria")
elseif key[:criteria] == "Rock, Paper, Scissors"
winner(sepdata[key], totalrpsranks, "criteria")
elseif key[:criteria] == "Guessing the Paintings"
winner(sepdata[key], x -> paintingrank(x, elias), "criteria")
end
comparativebeliefwinner(sepdata[key])
beliefwinner(sepdata[key], "criteria")
beliefwinner(sepdata[key], "lottery")
for row = eachrow(sepdata[key])
if criteria_chosen
reward_mechanism = "criteria"
else
reward_mechanism = "lottery"
end
row[:payment] += high_reward[2021] * row[Symbol("$(reward_mechanism)_winner")] +
low_reward * (row[:criteria_belief_winner] * (row[:prediction_payed] .== :criteria) +
(row[:prediction_payed] .== :best_mechanism) * row[:best_mechanism_winner] + (row[:prediction_payed] .== :lottery) * row[:lottery_belief_winner])
end
end
data[!, :payment] = round.(data[!, :payment], digits = 2);Criteria: Rock, Paper, Scissors, Criteria Control: false, Lottery Control: true
Remaining winners to attribute: 6
Remaining subjects whose winning status has not been characterized: 14
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Rock, Paper, Scissors
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 3
Entering tie-breaking in for mechanism criteria
Criteria: Time, Criteria Control: true, Lottery Control: true
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 6
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: DC-5 Lottery
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
There are more winner than normally should be the case. It may be a normal behavior.
Criteria: Time, Criteria Control: true, Lottery Control: false
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: DC-5 Lottery
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Criteria: Guessing the Paintings, Criteria Control: false, Lottery Control: true
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 10
Entering tie-breaking in for mechanism lottery
There are more winner than normally should be the case. It may be a normal behavior.
Mechanism chosen to attribute the reward: Guessing the Paintings
Remaining winners to attribute: 4
Remaining subjects whose winning status has not been characterized: 7
Entering tie-breaking in for mechanism criteria
Criteria: Rock, Paper, Scissors, Criteria Control: false, Lottery Control: false
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 8
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Rock, Paper, Scissors
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
Criteria: Guessing the Paintings, Criteria Control: false, Lottery Control: false
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 4
Entering tie-breaking in for mechanism lottery
There are more winner than normally should be the case. It may be a normal behavior.
Mechanism chosen to attribute the reward: Guessing the Paintings
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
treatments = unique(data[!, :treatment])6-element Vector{String}:
"RPS Winner, No Control, Lottery, Control"
"Time, Control, Lottery, Control"
"Time, Control, Lottery, No Control"
"Paintings, No Control, Lottery, Control"
"RPS Winner, No Control, Lottery, No Control"
"Paintings, No Control, Lottery, No Control"
# Transforms the ranks in percentages in order to relate them to each other.
transform!(data, :criteria_ranks => (x -> Float64.(x)) => :criteria_ranks)
transform!(data, :lottery_ranks =>(x -> Float64.(x)) => :lottery_ranks)
for t = treatments
data[(data[!, :treatment] .== t), :criteria_ranks] .= data[(data[!, :treatment].==t), :criteria_ranks] ./ sum(data[!, :treatment].==t)
data[(data[!, :treatment] .== t), :lottery_ranks] .= data[(data[!, :treatment].==t), :lottery_ranks] ./ sum(data[!, :treatment].==t)
end
Comments Encoding
We have encoded the comments according to three dummy variables
probability,preference, aderror.probabilitymeans that we read in the comment that higher probabilities of winning are what drives the choices of a mechanism over another (even if the belief/understanding and subsequent choice may not reflect that).preferencemeans that we read in the comment a intrinsic preference for one or the other mechanism.errorsmeans that the comments made by the participants showed some misunderstanding of the experiment.