package com.microsoft.office.react.officefeed.model;

import com.facebook.react.views.textinput.ReactEditTextInputConnectionWrapper;
import com.google.gson.annotations.SerializedName;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import java.util.Objects;

@ApiModel(description = "Defines settings related to the gradient bandit algorithm itself.")
/* loaded from: classes4.dex */
public class OASGradientBanditEngineBanditSettings {
    public static final String SERIALIZED_NAME_DYNAMIC_REWARD_BASELINE = "dynamicRewardBaseline";
    public static final String SERIALIZED_NAME_DYNAMIC_REWARD_BASELINE_DISCOUNT_FACTOR = "dynamicRewardBaselineDiscountFactor";
    public static final String SERIALIZED_NAME_PREFERENCE_LEARNING_RATE = "preferenceLearningRate";
    public static final String SERIALIZED_NAME_SOFT_MAX_TEMPERATURE = "softMaxTemperature";

    @SerializedName(SERIALIZED_NAME_DYNAMIC_REWARD_BASELINE)
    private Boolean dynamicRewardBaseline = Boolean.FALSE;

    @SerializedName(SERIALIZED_NAME_DYNAMIC_REWARD_BASELINE_DISCOUNT_FACTOR)
    private Double dynamicRewardBaselineDiscountFactor;

    @SerializedName(SERIALIZED_NAME_PREFERENCE_LEARNING_RATE)
    private Double preferenceLearningRate;

    @SerializedName(SERIALIZED_NAME_SOFT_MAX_TEMPERATURE)
    private Double softMaxTemperature;

    private String toIndentedString(Object obj) {
        return obj == null ? "null" : obj.toString().replace(ReactEditTextInputConnectionWrapper.NEWLINE_RAW_VALUE, "\n    ");
    }

    public OASGradientBanditEngineBanditSettings dynamicRewardBaseline(Boolean bool) {
        this.dynamicRewardBaseline = bool;
        return this;
    }

    public OASGradientBanditEngineBanditSettings dynamicRewardBaselineDiscountFactor(Double d2) {
        this.dynamicRewardBaselineDiscountFactor = d2;
        return this;
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        OASGradientBanditEngineBanditSettings oASGradientBanditEngineBanditSettings = (OASGradientBanditEngineBanditSettings) obj;
        return Objects.equals(this.dynamicRewardBaseline, oASGradientBanditEngineBanditSettings.dynamicRewardBaseline) && Objects.equals(this.dynamicRewardBaselineDiscountFactor, oASGradientBanditEngineBanditSettings.dynamicRewardBaselineDiscountFactor) && Objects.equals(this.preferenceLearningRate, oASGradientBanditEngineBanditSettings.preferenceLearningRate) && Objects.equals(this.softMaxTemperature, oASGradientBanditEngineBanditSettings.softMaxTemperature);
    }

    @ApiModelProperty("If true, the reward baseline used in the bandits update algorithm is tracked dynamically. Assumes that the bandit is presented with all feedback, also zero reward items. If false, a static reward baseline of 0.0 is assumed.")
    public Boolean getDynamicRewardBaseline() {
        return this.dynamicRewardBaseline;
    }

    @ApiModelProperty(required = true, value = "The discount factor to use if tracking the reward baseline dynamically. The tracked baseline is the exponential recency weighted average reward. A value of 1.0 means that all the weight goes on the very last reward. A value smaller than 1.0 acts as a discounting factor, indicating how slow we decay older reward values when computing the average. See Section 2.5 in Sutton and Bartos book.")
    public Double getDynamicRewardBaselineDiscountFactor() {
        return this.dynamicRewardBaselineDiscountFactor;
    }

    @ApiModelProperty(required = true, value = "The step size to use when updating the underlying preferences. Basically the learning rate in gradient ascent. See Section 2.8 in Sutton and Bartos book")
    public Double getPreferenceLearningRate() {
        return this.preferenceLearningRate;
    }

    @ApiModelProperty(required = true, value = "The temperature, in allusion to statistical mechanics, used when computing policy values from underlying preferences using soft max normalization. For high temperatures (i.e., much higher than 1.0) all actions have nearly the same probability and the lower the temperature the more expected rewards affect the probability. For a low temperature (i.e., close to 0.0 the probability of the action with the highest expected rewards tends to 1.")
    public Double getSoftMaxTemperature() {
        return this.softMaxTemperature;
    }

    public int hashCode() {
        return Objects.hash(this.dynamicRewardBaseline, this.dynamicRewardBaselineDiscountFactor, this.preferenceLearningRate, this.softMaxTemperature);
    }

    public OASGradientBanditEngineBanditSettings preferenceLearningRate(Double d2) {
        this.preferenceLearningRate = d2;
        return this;
    }

    public void setDynamicRewardBaseline(Boolean bool) {
        this.dynamicRewardBaseline = bool;
    }

    public void setDynamicRewardBaselineDiscountFactor(Double d2) {
        this.dynamicRewardBaselineDiscountFactor = d2;
    }

    public void setPreferenceLearningRate(Double d2) {
        this.preferenceLearningRate = d2;
    }

    public void setSoftMaxTemperature(Double d2) {
        this.softMaxTemperature = d2;
    }

    public OASGradientBanditEngineBanditSettings softMaxTemperature(Double d2) {
        this.softMaxTemperature = d2;
        return this;
    }

    public String toString() {
        return "class OASGradientBanditEngineBanditSettings {\n    dynamicRewardBaseline: " + toIndentedString(this.dynamicRewardBaseline) + ReactEditTextInputConnectionWrapper.NEWLINE_RAW_VALUE + "    dynamicRewardBaselineDiscountFactor: " + toIndentedString(this.dynamicRewardBaselineDiscountFactor) + ReactEditTextInputConnectionWrapper.NEWLINE_RAW_VALUE + "    preferenceLearningRate: " + toIndentedString(this.preferenceLearningRate) + ReactEditTextInputConnectionWrapper.NEWLINE_RAW_VALUE + "    softMaxTemperature: " + toIndentedString(this.softMaxTemperature) + ReactEditTextInputConnectionWrapper.NEWLINE_RAW_VALUE + "}";
    }
}
