Experiment 1, session 1, cleaning the data

Authors
Affiliations

Elias Bouacida

University Paris 8

Renaud Foucart

Lancaster University

Published

October 7, 2024

Clean the raw data file, and lays out the assumption behind the cleaning.

# To uncomment if run separately
include("setup.jl");

Reading the Data

Time Spent

Read the table where the time spent on each page is kept. Then apply the function treatment to each subject.

Because of how we programmed the experiment, page 6 does not exist in the data and is therefore skipped. The epoch_time_completed for the page index 0 is exactly the same as the one used in participant_time_started_utc from the data. So we do not need to keep it here.

n_pages = 11;
pages = append!(collect(0:5),collect(7:11));
timespent = CSV.read(joinpath("..", "Data", "OriginalData", "PageTimes-2021-06-22.csv"), DataFrame, normalizenames = true, stringtype=String)
timedata = combine(groupby(timespent, :participant_code), x->treatment(x, pages=pages));
for i = 1:(length(pages)-1)
    transform!(timedata, Symbol("time_page_$i") => ByRow(x -> (missingtime(x))) => Symbol("time_page_$i"))
end
transform!(timedata, :time_in_experiment => ByRow(x -> (missingtime(x))) => :time_in_experiment);

Raw Data

Read the raw data (after the anonymization)

rawdata = CSV.read(joinpath("..", "Data", "OriginalData", "Experiment1_ARawData.csv"), DataFrame, normalizenames=true);

Aggregate All the Data

data = innerjoin(rawdata, timedata, on = :participant_code);
custom_data = CSV.read(joinpath("..", "Data", "OriginalData", "Feedback_2021-06-22.csv"), DataFrame, 
    normalizenames=true, 
    truestrings = ["true", "True", "1"], 
    falsestrings = ["false", "False", "0"], 
    pool = false,
    stringtype = String,
    );
data = innerjoin(data, custom_data, on = [:participant_code,:session_code]); 

Cleaning the Data

Finishing the Experiments

Removed subjects who have not finished the experiment

Then keep only subjects who have finished the experiment, and remove the finished column as a consequence.

data = data[.!ismissing.(data[!, :participant_current_page_name]).& (data[!, :participant_current_page_name] .== "Results"), :];
select!(data, Not([:finished])); 

Selecting a subset of columns

We keep on the columns from the following relevantcolumns variable.

select!(data, Not(r"player_role$"))
select!(data, Not(r"group*"))
select!(data, Not(r"payoff*")) # Must check that it does not ruin the payments
select!(data, Not(r"Mechanism*"));
apps = ["Algorithms", "Beliefs", "Mechanism", "Feedback", "Questionnaire"]
models = ["player", "subsession"]
for col = names(data)
    for app = apps, model = models
        m = match(Regex("$app") * r"_(?<round>\d)_" * Regex("$model") * r"_(?<column>\w+)", string(col))
        if !(m === nothing)
            #println(m)
            if (app != "Beliefs") & (m[:column] == "round_number")
                select!(data, Not(col))
            elseif (app == "Beliefs") 
                if (m[:column] == "criteria")
                    select!(data, Not(col))
                elseif m[:column] == "belief"
                    rename!(data, col => Symbol(m[:column] * "_" * m[:round]))
                end
            else
                rename!(data, col => Symbol(m[:column]))
            end
        end
    end
end

Keeping relevant columns

relevantcolumns = [:participant_code,
    :participant_time_started_utc,
    :criteria_choice1,
    :criteria_choice2,
    :criteria_choice3,
    :criteria_choice4,
    :criteria_choice5,
    :lottery_choice1,
    :lottery_choice2,
    :lottery_choice3,
    :lottery_choice4,
    :lottery_choice5,
    :age,
    :gender,
    :employment,
    :region,
    :urn,
    :colour,
    :colour_drawn,
    :urn_winner,
    :reasons,
    :unique_id,
    :time_in_experiment,
    :time_page_1,
    :time_page_2,
    :time_page_3,
    :time_page_4,
    :time_page_5,
    :time_page_6,
    :time_page_7,
    :time_page_8,
    :time_page_9,
    :time_page_10,
    :criteria,
    :criteria_first,
    :control,
    :rps_winner,
    :criteria_choices,
    :lottery_choices,
    :arrival_code,
    :paintings_order,
    :belief_1,
    :belief_2,
    :mechanism,
    :best_mechanism,
]
select!(data, relevantcolumns);

Transform Columns Types

Transform some string columns with only two possibilities into boolean columns.

# Make one of the column that should be a boolean a boolean
transform!(data, :criteria_first => (x -> Bool.(x)) => :criteria_first);
# Transform the date in a proper date.
dformat = DateFormat("y-m-d H:M:S.s")
data[!, :participant_time_started_utc] = (x -> DateTime.(x[1:end-3], dformat)).(data[!, :participant_time_started_utc]);

Parsing choices into their proper formats

# Parsing the choices
data[!, :criteria_choices] = criteriachoices.(data[!, :criteria_choices]);
transform!(data, :arrival_code => ByRow(x -> arrivalcode(x)) => :arrival_code)
for i = 1:5
    transform!(data, :arrival_code => ByRow(x -> parse(Int, string(x)[i])) => Symbol("arrival_code$i"))
end
# Tranform the lottery choices into their proper Bool representation
transform!(data, :lottery_choices => ByRow(x -> Int.(split(strip(x, [']', '[']), ", ") .== repeat(["True"], 5))) => :lottery_choices);
## Assigning the beliefs to the criteria or the lottery.
data[!, :criteria_belief] = falses(size(data, 1))
data[!, :lottery_belief] = falses(size(data, 1))
data[data[!, :criteria_first], :criteria_belief] = (data[data[!, :criteria_first], :belief_1] .== 1)
data[.!data[!, :criteria_first], :criteria_belief] = (data[.!data[!, :criteria_first], :belief_2] .== 1)
data[data[!, :criteria_first], :lottery_belief] = (data[data[!, :criteria_first], :belief_2] .== 1)
data[.!data[!, :criteria_first], :lottery_belief] = (data[.!data[!, :criteria_first], :belief_1] .== 1)
select!(data, Not([:belief_1, :belief_2]));
booleancolumns = append!([:urn_winner, :control, :rps_winner], [Symbol("lottery_choice$i") for i = 1:5])
for col = booleancolumns
    data[!, col] = (data[!, col] .== 1)
end

Transform the mechanism column into a boolean. Value of 1 if RPS, 0 if Coin Toss.

data[!, :mechanism] = .!(data[!, :mechanism] .== "DC-5 Lottery");
# Create a boolean telling whether the non-lottery was believed better than the lottery or not.
data[!, :best_mechanism] = .!(data[!, :best_mechanism] .== "DC-5 Lottery");

Transform the colours of the balls in the ambiguity choice into boolean values. The colour black is transformed in 1, on red into 0.

data[!, :colour] = (data[!, :colour] .== "Black");
data[!, :female] = (data[!, :gender] .== "Female")
data[!, :male] = (data[!, :gender] .== "Male");
data[!, :other] = (data[!, :gender] .== "Other")
select!(data, Not(:gender));

Consider that everyone that chose the left urn is ambiguity averse. This is a debatable assumption, but the best we can do with the available data. The difference in the number of ambiguity averse / ambiguity loving subjects is correct if ambiguity neutral subjects randonly choose between the right and left urns.

Remove then the :urn column that encodes exactly the same information.

data[!, :ambiguity_averse] = (data[!, :urn] .== "Urn Left");
select!(data, Not(:urn));
# Create a variable characterizing the four different combination of treatments that are possible.
data[!, :treatment] .= "RPS Winner, Control, Lottery, Control"
data[.!data[!, :rps_winner] .& (data[!, :criteria] .== "Rock, Paper, Scissors") .& data[!, :control], :treatment] .= "RPS Loser, Control, Lottery, Control"
data[.!data[!, :rps_winner] .& (data[!, :criteria] .== "Rock, Paper, Scissors") .& .!data[!, :control], :treatment] .= "RPS Loser, Control, Lottery, No Control"
data[data[!, :rps_winner] .& (data[!, :criteria] .== "Rock, Paper, Scissors") .& .!data[!, :control], :treatment] .= "RPS Winner, Control, Lottery, No Control"
data[(data[!, :criteria] .== "Arrival Time") .& data[!, :control], :treatment] .= "Time, No Control, Lottery, Control"
data[(data[!, :criteria] .== "Arrival Time") .& .!data[!, :control], :treatment] .= "Time, No Control, Lottery, No Control"
data[(data[!, :criteria] .== "Guessing the Paintings") .& data[!, :control], :treatment] .= "Paintings, Control, Lottery, Control"
data[(data[!, :criteria] .== "Guessing the Paintings") .& .!data[!, :control], :treatment] .= "Paintings, Control, Lottery, No Control";
# Create dummys `:criteria_control` for criteria with and without control, rename the `:control` for lottery as `:lottery_control`
rename!(data, :control=>:lottery_control)
data[!, :criteria_control] .= true
data[data[!, :criteria] .== "Arrival Time", :criteria_control] .= false;

Unify the definition of a country and remove the previous column :region that encoded the same data.
Assumes that most participants are from the USA. In particular, understand Georgia as being the USA state rather than the country.

data[!, :country] .= "USA"
for (i, region) = enumerate(data[!, :region])
    if occursin(r"ukraine"i, region)
        data[i, :country] = "Ukraine"
    elseif occursin(r"germany"i, region)
        data[i, :country] = "Germany"
    elseif occursin(r"fran"i, region)
        data[i, :country] = "France"
    elseif occursin(r"india|kolkata|tamil"i, region) 
        data[i, :country] = "India" 
    elseif occursin(r"ital[y|ia]"i, region)
        data[i, :country] = "Italy"
    elseif occursin(r"Bra[zs]il"i, region)
        data[i, :country] = "Brazil"
    elseif occursin(r"uk|united kingdom|england"i, region)
        data[i, :country] = "United Kingdom"
    elseif occursin(r"canada"i, region)
        data[i, :country] = "Canada"
    elseif occursin(r"portugal"i, region)
        data[i, :country] = "Portugal"
    elseif occursin(r"sweden"i, region)
        data[i, :country] = "Sweden"
    elseif occursin(r"spain"i, region)
        data[i, :country] = "Spain"
    elseif occursin(r"bulgaria"i, region)
        data[i, :country] = "Bulgaria"
    elseif occursin(r"Eua"i, region)
        data[i, :country] = "UAE"
    elseif occursin(r"thailand"i, region)
        data[i, :country] = "Thailand"
    elseif occursin(r"turkey"i, region)
        data[i, :country] = "Turkey"
    elseif occursin(r"netherlands"i, region)
        data[i, :country] = "The Netherlands"
    elseif occursin(r"venezuela"i, region)
        data[i, :country] = "Venezuela"
    elseif occursin(r"asian"i, region)
        data[i, :country] = "Asian"        
    end
end
select!(data, Not(:region));

Computing the Payments

include("PaymentFunctions.jl")
create_groups
data[!, :prediction_payed] = rand([:lottery, :criteria, :best_mechanism], size(data, 1))
data[!, :best_mechanism_winner] = missings(Bool, size(data, 1))
data[!, :payment] = data[!, :urn_winner] .* low_reward .+ participation_fee[2021]
for name = ["lottery", "criteria"]
    data[!, Symbol("$(name)_winner")] = missings(Bool, size(data, 1))
    data[!, Symbol("$(name)_ranks")] = zeros(Int, size(data, 1))
    data[!, Symbol("$(name)_score")] = missings(Int, size(data, 1))
    data[!, Symbol("$(name)_belief_winner")] = missings(Bool, size(data, 1))
end
# Now working only on the split data.
sepdata = groupby(data, [:criteria, :lottery_control, :rps_winner]);
for key = keys(sepdata)
    println("Criteria: ", key[:criteria]," Winner/loser: ", key[:rps_winner], ", Criteria Control: ", sepdata[key][1, :criteria_control], ", Lottery Control: ", key[:lottery_control])
    winner(sepdata[key], x -> lotteryrank(x, dc5), "lottery")
    criteria_chosen = (mean(sepdata[key][!, :mechanism]) > 0.5)
    if criteria_chosen
        chosenmechanism = key[:criteria]
    else
        chosenmechanism = "DC-5 Lottery"
    end
    println("Mechanism chosen to attribute the reward: ", chosenmechanism)
    if key[:criteria] == "Arrival Time"
        winner(sepdata[key], arrivaltimeranks, "criteria")
    elseif key[:criteria] == "Rock, Paper, Scissors"
        winner(sepdata[key], totalrpsranks, "criteria", key[:rps_winner])
    elseif key[:criteria] == "Guessing the Paintings"
        winner(sepdata[key], x -> paintingrank(x, elias), "criteria")   
    end
    
    comparativebeliefwinner(sepdata[key])
    beliefwinner(sepdata[key], "criteria")
    beliefwinner(sepdata[key], "lottery")
    for row = eachrow(sepdata[key])
        if criteria_chosen
            reward_mechanism = "criteria"
        else
            reward_mechanism = "lottery"
        end
        row[:payment] += high_reward[2021] * row[Symbol("$(reward_mechanism)_winner")] +
            low_reward * (row[:criteria_belief_winner] * (row[:prediction_payed] .== :criteria) + 
            (row[:prediction_payed] .== :best_mechanism) * row[:best_mechanism_winner] + (row[:prediction_payed] .== :lottery) * row[:lottery_belief_winner])         
      end
end
data[!, :payment] = round.(data[!, :payment], digits = 2);
Criteria: Guessing the Paintings Winner/loser: false, Criteria Control: true, Lottery Control: false
Remaining winners to attribute: 4
Remaining subjects whose winning status has not been characterized: 5
Entering tie-breaking in for mechanism lottery
There are more winner than normally should be the case. It may be a normal behavior.
Mechanism chosen to attribute the reward: Guessing the Paintings
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Criteria: Rock, Paper, Scissors Winner/loser: true, Criteria Control: true, Lottery Control: false
Remaining winners to attribute: 2
Remaining subjects whose winning status has not been characterized: 4
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Rock, Paper, Scissors
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Criteria: Rock, Paper, Scissors Winner/loser: false, Criteria Control: true, Lottery Control: true
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Mechanism chosen to attribute the reward: Rock, Paper, Scissors
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
Criteria: Arrival Time Winner/loser: false, Criteria Control: false, Lottery Control: false
Remaining winners to attribute: 4
Remaining subjects whose winning status has not been characterized: 5
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Arrival Time
Remaining winners to attribute: 0
Remaining subjects whose winning status has not been characterized: 0
Criteria: Rock, Paper, Scissors Winner/loser: true, Criteria Control: true, Lottery Control: true
Remaining winners to attribute: 9
Remaining subjects whose winning status has not been characterized: 13
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Rock, Paper, Scissors
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 2
Entering tie-breaking in for mechanism criteria
Criteria: Rock, Paper, Scissors Winner/loser: false, Criteria Control: true, Lottery Control: false
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 7
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Rock, Paper, Scissors
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Criteria: Arrival Time Winner/loser: false, Criteria Control: false, Lottery Control: true
Remaining winners to attribute: 4
Remaining subjects whose winning status has not been characterized: 7
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: DC-5 Lottery
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 1
Criteria: Guessing the Paintings Winner/loser: false, Criteria Control: true, Lottery Control: true
Remaining winners to attribute: 10
Remaining subjects whose winning status has not been characterized: 12
Entering tie-breaking in for mechanism lottery
Mechanism chosen to attribute the reward: Guessing the Paintings
Remaining winners to attribute: 1
Remaining subjects whose winning status has not been characterized: 4
Entering tie-breaking in for mechanism criteria
# Transforms the ranks in percentages in order to relate them to each other.
treatments = unique(data[!, :treatment])
8-element Vector{String}:
 "Paintings, Control, Lottery, No Control"
 "RPS Winner, Control, Lottery, No Control"
 "RPS Loser, Control, Lottery, Control"
 "Time, No Control, Lottery, No Control"
 "RPS Winner, Control, Lottery, Control"
 "RPS Loser, Control, Lottery, No Control"
 "Time, No Control, Lottery, Control"
 "Paintings, Control, Lottery, Control"
transform!(data, :criteria_ranks => (x -> Float64.(x)) => :criteria_ranks)
transform!(data, :lottery_ranks =>(x -> Float64.(x)) => :lottery_ranks)
for t = treatments
    data[(data[!, :treatment] .== t), :criteria_ranks] .= data[(data[!, :treatment].==t), :criteria_ranks] ./ sum(data[!, :treatment].==t)
    data[(data[!, :treatment] .== t), :lottery_ranks] .= data[(data[!, :treatment].==t), :lottery_ranks] ./ sum(data[!, :treatment].==t) 
end

Comments Encoding

We have encoded the comments according to three dummy variables probability, preference, ad error.

  • probability means that we read in the comment that higher probabilities of winning are what drives the choices of a mechanism over another (even if the belief/understanding and subsequent choice may not reflect that).
  • preference means that we read in the comment a intrinsic preference for one or the other mechanism.
  • errors means that the comments made by the participants showed some misunderstanding of the experiment.
comments = CSV.read(joinpath("..", "Data", "OriginalData", "Experiment1_AComments.csv"), DataFrame, select = [:participant_code, :probability, :preference, :error],
    types = Dict(:probability => Bool, :preference => Bool, :error => Bool));
missing_part = setdiff(data[!, :participant_code], comments[!, :participant_code])
InlineStrings.String15[]
# Join together the data and our comments.
data = innerjoin(data, comments, on = :participant_code);
# Aligning the names of expe1 and 2
data[(data[!, :criteria] .== "Arrival Time"), :criteria] .= "Time";
# Adding a session number
data[!, :session] .= 1;
# Saving the Cleaned Data
data |> CSV.write(joinpath("..", "Data", "Input", "Experiment1_ACleanedData.csv"), delim = ',')
"../Data/Input/Experiment1_ACleanedData.csv"