Natural Language Inference Testing Config

Below is the default configuration for all Natural Language Inference tests. A copy of this can also be found in your rime_trial bundle (inside the nlp_examples/nli/default_test_config.json).

{
  "categories": [],
  "run_default": null,
  "custom_tests": null,
  "numeric_outlier": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    },
    "min_normal_prop": 0.99,
    "baseline_quantile": 0.1,
    "perturb_multiplier": 1.0
  },
  "unseen_categorical": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "unseen_domain": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "unseen_email": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "unseen_url": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "rare_categories": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    },
    "include_columns": [],
    "min_num_occurrences": 0,
    "min_pct_occurrences": 0,
    "min_ratio_rel_uniform": 0.005
  },
  "out_of_range": {
    "exclude_columns": [],
    "run": false,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    },
    "std_factor": 3
  },
  "req_characters": {
    "column_specific_params": {},
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "inconsistencies": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    },
    "freq_ratio_threshold": 0.02,
    "min_correlation": 0.1,
    "max_pairwise_tests": 200,
    "max_unique_pairs_for_firewall": 15
  },
  "capitalization": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "empty_string": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    }
  },
  "embedding_anomalies": {
    "exclude_columns": [],
    "run": true,
    "performance_change_config": {
      "severity_thresholds": null,
      "min_num_samples": 1
    },
    "distance_quantile": 0.995
  },
  "feat_subset_auc": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_accuracy": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_f1": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_macro_f1": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_precision": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_macro_precision": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_fpr": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_recall": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_macro_recall": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_pred_variance_pos": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_pred_variance_neg": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_rmse": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_mae": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_mape": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_rank_correlation": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_ndcg": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_mrr": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_multiclass_acc": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_multiclass_auc": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_auc": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_accuracy": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_f1": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_macro_f1": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_precision": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_macro_precision": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_fpr": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_recall": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_macro_recall": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_pred_variance_pos": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_pred_variance_neg": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_pred_variance_all": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_rmse": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_mae": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_mape": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_rank_correlation": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_ndcg": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_mrr": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_multiclass_acc": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "feat_subset_drift_multiclass_auc": {
    "exclude_columns": [],
    "run": true,
    "min_sample_size": 20,
    "performance_change_thresholds": null
  },
  "correlation_feat_drift": {
    "exclude_columns": [],
    "run": true,
    "correlation_drift_thresholds": [
      0.3,
      0.5,
      0.7
    ],
    "p_value_threshold": 0.05,
    "min_correlation": 0.1,
    "max_pairwise_tests": 200
  },
  "correlation_label_drift": {
    "exclude_columns": [],
    "run": true,
    "correlation_drift_thresholds": [
      0.3,
      0.5,
      0.7
    ],
    "p_value_threshold": 0.05
  },
  "mutual_information_feat_drift": {
    "exclude_columns": [],
    "run": true,
    "min_mutual_information": 0.1,
    "mutual_information_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_pairwise_tests": 200,
    "min_sample_size": 100
  },
  "mutual_information_label_drift": {
    "exclude_columns": [],
    "run": true,
    "mutual_information_thresholds": [
      0.1,
      0.2,
      0.3
    ]
  },
  "categorical_label_drift": {
    "run": true,
    "drift_statistic": "Population Stability Index",
    "params": {
      "run": true,
      "distance_thresholds": [
        0.2,
        0.4,
        0.6
      ]
    }
  },
  "multiclass_pred_label_drift": {
    "run": true,
    "drift_statistic": "Population Stability Index",
    "params": {
      "run": true,
      "distance_thresholds": [
        0.2,
        0.4,
        0.6
      ]
    }
  },
  "regression_label_drift": {
    "run": true,
    "p_value_threshold": 0.05,
    "ks_stat_thresholds": [
      0.1,
      0.33,
      0.67
    ]
  },
  "categorical_drift": {
    "exclude_columns": [],
    "run": true,
    "drift_scaling_factor": 0.005,
    "performance_change_thresholds": null,
    "drift_statistic": "Population Stability Index",
    "params": {
      "run": true,
      "drift_scaling_factor": 0.005,
      "performance_change_thresholds": null,
      "min_sample_size": 100,
      "max_sample_size": null,
      "distance_threshold": 0.2
    },
    "ignore_nans": true
  },
  "continuous_drift": {
    "exclude_columns": [],
    "run": true,
    "drift_scaling_factor": 0.005,
    "performance_change_thresholds": null,
    "drift_statistic": "Population Stability Index",
    "params": {
      "run": true,
      "drift_scaling_factor": 0.005,
      "performance_change_thresholds": null,
      "min_sample_size": 100,
      "min_num_quantiles": 1000,
      "distance_threshold": 0.2,
      "num_bins": 100
    },
    "ignore_nans": true
  },
  "prediction_drift": {
    "run": true,
    "drift_statistic": "Population Stability Index",
    "params": {
      "run": true,
      "min_sample_size": 100,
      "min_num_quantiles": 1000,
      "psi_thresholds": [
        0.2,
        0.4,
        0.6
      ],
      "num_bins": 100
    }
  },
  "embedding_drift": {
    "exclude_columns": [],
    "run": true,
    "drift_scaling_factor": 0.005,
    "performance_change_thresholds": null,
    "drift_statistic": "euclidean_distance",
    "params": {
      "run": true,
      "drift_scaling_factor": 0.005,
      "performance_change_thresholds": null,
      "min_sample_size": 100,
      "distance_threshold": 0.1,
      "normalize": true
    }
  },
  "overall_metrics": {
    "run": true,
    "metrics_specific_thresholds": {}
  },
  "avg_confidence": {
    "run": true,
    "severity_thresholds": [
      0.03,
      0.08,
      0.13
    ]
  },
  "atc": {
    "run": true,
    "severity_thresholds": [
      0.03,
      0.08,
      0.13
    ]
  },
  "calibration_comparison": {
    "run": true,
    "severity_level_thresholds": [
      0.02,
      0.06,
      0.1
    ]
  },
  "label_imbalance": {
    "run": true,
    "severity_thresholds": [
      0.6,
      0.75,
      0.9
    ],
    "normalize": true
  },
  "global_sample_size": null,
  "metadata_tests": null,
  "global_target_text_keys": null,
  "unseen_unigram_abnormal_input": {
    "run": true,
    "severity_thresholds": [
      0.0,
      0.0002,
      0.0005
    ],
    "performance_impact_config": {
      "ignore_observed_performance": false,
      "min_num_samples": 1,
      "severity_thresholds": [
        0.01,
        0.05,
        0.1
      ]
    },
    "p_value_threshold": 0.0005
  },
  "empty_string_abnormal_input": {
    "run": true,
    "severity_thresholds": [
      0.0,
      0.0002,
      0.0005
    ],
    "performance_impact_config": {
      "ignore_observed_performance": false,
      "min_num_samples": 1,
      "severity_thresholds": [
        0.01,
        0.05,
        0.1
      ]
    }
  },
  "char_dist_drift": {
    "run": true,
    "drift_metrics": [
      {
        "distance_metric": "Population Stability Index",
        "severity_threshold": [
          0.1,
          0.2,
          0.4
        ]
      }
    ],
    "min_occurrences": 0,
    "model_impact_config": {
      "ignore_observed_performance": false,
      "min_num_samples": 1,
      "severity_thresholds": [
        0.01,
        0.05,
        0.1
      ]
    }
  },
  "unigrams_drift": {
    "run": true,
    "drift_metrics": [
      {
        "distance_metric": "Population Stability Index",
        "severity_threshold": [
          0.1,
          0.2,
          0.4
        ]
      }
    ],
    "min_occurrences": 5,
    "model_impact_config": {
      "ignore_observed_performance": false,
      "min_num_samples": 1,
      "severity_thresholds": [
        0.01,
        0.05,
        0.1
      ]
    }
  },
  "bigrams_drift": {
    "run": true,
    "drift_metrics": [
      {
        "distance_metric": "Population Stability Index",
        "severity_threshold": [
          0.1,
          0.2,
          0.4
        ]
      }
    ],
    "min_occurrences": 5,
    "model_impact_config": {
      "ignore_observed_performance": false,
      "min_num_samples": 1,
      "severity_thresholds": [
        0.01,
        0.05,
        0.1
      ]
    }
  },
  "invisible_chars_attack": {
    "run": true,
    "sample_size": 5,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.25,
      0.5
    ],
    "attack_params": {
      "target_score": 0.0,
      "max_queries": 200,
      "population_size": 15,
      "max_unsuccessful_iters": 25,
      "max_consecutive_unsuccessful": 3
    },
    "mutation_params": {
      "aug_char_p": 0.01
    },
    "max_str_len": 2000
  },
  "deletion_chars_attack": {
    "run": true,
    "sample_size": 5,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.25,
      0.5
    ],
    "attack_params": {
      "target_score": 0.0,
      "max_queries": 200,
      "population_size": 15,
      "max_unsuccessful_iters": 25,
      "max_consecutive_unsuccessful": 3
    },
    "mutation_params": {
      "aug_char_p": 0.01
    },
    "max_str_len": 2000
  },
  "intentional_homoglyph_attack": {
    "run": true,
    "sample_size": 5,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.25,
      0.5
    ],
    "attack_params": {
      "target_score": 0.0,
      "max_queries": 200,
      "population_size": 15,
      "max_unsuccessful_iters": 25,
      "max_consecutive_unsuccessful": 3
    },
    "mutation_params": {
      "aug_char_p": 0.01
    },
    "max_str_len": 2000
  },
  "confusable_homoglyph_attack": {
    "run": true,
    "sample_size": 5,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.25,
      0.5
    ],
    "attack_params": {
      "target_score": 0.0,
      "max_queries": 200,
      "population_size": 15,
      "max_unsuccessful_iters": 25,
      "max_consecutive_unsuccessful": 3
    },
    "mutation_params": {
      "aug_char_p": 0.01
    },
    "max_str_len": 2000
  },
  "universal_triggers_attack": {
    "run": true,
    "sample_size": 3,
    "target_text_keys": null,
    "batch_size": 10,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "attack_params": {
      "target_score": 0.0,
      "max_queries": 500,
      "population_size": 2,
      "max_unsuccessful_iters": 200,
      "max_consecutive_unsuccessful": 10
    },
    "mutation_params": {
      "prefix_len": 10,
      "aug_word_p": 0.2
    }
  },
  "char_substitution_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "min_char": 2,
      "aug_char_max": 1,
      "aug_char_min": 1,
      "aug_char_p": 0.1,
      "aug_word_min": 1,
      "aug_word_max": null,
      "aug_word_p": 0.05
    }
  },
  "char_deletion_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "min_char": 2,
      "aug_char_max": 1,
      "aug_char_min": 1,
      "aug_char_p": 0.1,
      "aug_word_min": 1,
      "aug_word_max": null,
      "aug_word_p": 0.05
    }
  },
  "char_insertion_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "min_char": 2,
      "aug_char_max": 1,
      "aug_char_min": 1,
      "aug_char_p": 0.1,
      "aug_word_min": 1,
      "aug_word_max": null,
      "aug_word_p": 0.05
    }
  },
  "char_swap_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "min_char": 2,
      "aug_char_max": 1,
      "aug_char_min": 1,
      "aug_char_p": 0.1,
      "aug_word_min": 1,
      "aug_word_max": null,
      "aug_word_p": 0.05
    }
  },
  "keyboard_aug_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "min_char": 2,
      "aug_char_max": 1,
      "aug_char_min": 1,
      "aug_char_p": 0.1,
      "aug_word_min": 1,
      "aug_word_max": null,
      "aug_word_p": 0.05
    }
  },
  "common_misspelling_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": null
  },
  "ocr_error_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": null
  },
  "synonym_swap_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "aug_min": 1,
      "aug_max": null,
      "aug_p": 0.05
    }
  },
  "lm_word_substitution_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "aug_min": 1,
      "aug_max": null,
      "aug_p": 0.05,
      "model_path": "distilbert-base-cased"
    }
  },
  "lm_word_insertion_attack": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "attack_params": {
      "aug_min": 1,
      "aug_max": null,
      "aug_p": 0.05,
      "model_path": "distilbert-base-cased"
    }
  },
  "upper_case_text": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000
  },
  "lower_case_text": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000
  },
  "remove_special_chars": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000
  },
  "swap_masc_to_fem": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000
  },
  "swap_fem_to_masc": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000
  },
  "swap_fem_name_to_masc_name": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "source": null,
    "target": null,
    "case_invariant": true
  },
  "swap_masc_name_to_fem_name": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000,
    "source": null,
    "target": null,
    "case_invariant": true
  },
  "ascii": {
    "run": true,
    "sample_size": 200,
    "target_text_keys": null,
    "severity_thresholds": [
      0.1,
      0.2,
      0.3
    ],
    "max_str_len": 2000
  }
}