### Text Classification Testing Config Below is the default configuration for all Text Classification tests. A copy of this can also be found in your `rime_trial` bundle (inside the `nlp_examples/classification/default_test_config.json`). ```python { "categories": [], "run_default": null, "custom_tests": null, "numeric_outlier": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "min_normal_prop": 0.99, "baseline_quantile": 0.1, "perturb_multiplier": 1.0 }, "unseen_categorical": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "unseen_domain": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "unseen_email": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "unseen_url": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "rare_categories": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "include_columns": [], "min_num_occurrences": 0, "min_pct_occurrences": 0, "min_ratio_rel_uniform": 0.005 }, "out_of_range": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "std_factor": 3 }, "int_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "float_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "str_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "bool_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "url_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "domain_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "email_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "req_characters": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "column_specific_params": {} }, "inconsistencies": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "freq_ratio_threshold": 0.02, "min_correlation": 0.1, "max_pairwise_tests": 200, "max_unique_pairs_for_firewall": 15 }, "null_check": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "capitalization": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "empty_string": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "feat_subset_auc": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_accuracy": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_f1": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_macro_f1": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_precision": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_macro_precision": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_fpr": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_recall": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_macro_recall": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_pred_variance_pos": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_pred_variance_neg": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_pred_variance_all": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_rmse": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_mae": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_rank_correlation": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_ndcg": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_mrr": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "correlation_drift": { "exclude_columns": [], "run": true, "min_correlation": 0.1, "correlation_thresholds": [ 0.1, 0.2, 0.3 ], "p_value_threshold": 0.05, "max_pairwise_tests": 200 }, "mutual_information_feat_drift": { "exclude_columns": [], "run": true, "min_mutual_information": 0.1, "mutual_information_thresholds": [ 0.1, 0.2, 0.3 ], "max_pairwise_tests": 200, "min_sample_size": 100 }, "mutual_information_label_drift": { "exclude_columns": [], "run": true, "min_mutual_information": 0.1, "mutual_information_thresholds": [ 0.1, 0.2, 0.3 ], "max_pairwise_tests": 200, "min_sample_size": 100 }, "categorical_label_drift": { "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "num_values_for_graph": 5, "distance_thresholds": [ 0.2, 0.4, 0.6 ] } }, "multiclass_pred_label_drift": { "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "num_values_for_graph": 5, "distance_thresholds": [ 0.2, 0.4, 0.6 ] } }, "regression_label_drift": { "run": true, "p_value_threshold": 0.05, "ks_stat_thresholds": [ 0.1, 0.33, 0.67 ] }, "categorical_drift": { "exclude_columns": [], "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "min_sample_size": 100, "max_sample_size": null, "distance_threshold": 0.2 } }, "null_proportion": { "exclude_columns": [], "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "p_value_threshold": 0.05, "min_sample_size": 100 }, "row_null_proportion": { "exclude_columns": [], "run": true, "drift_statistic": "Population Stability Index", "params": { "exclude_columns": [], "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "psi_threshold": 0.2 } }, "continuous_drift": { "exclude_columns": [], "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "drift_statistic": "Population Stability Index", "params": { "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "min_sample_size": 100, "min_num_quantiles": 1000, "distance_threshold": 0.2, "num_bins": 100 } }, "overall_metrics": { "run": true, "metrics_specific_thresholds": {} }, "prediction_drift": { "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "min_sample_size": 100, "min_num_quantiles": 1000, "psi_thresholds": [ 0.2, 0.4, 0.6 ], "num_bins": 100 } }, "calibration_comparison": { "run": true, "severity_level_thresholds": [ 0.02, 0.06, 0.1 ] }, "global_sample_size": null, "metadata_tests": null, "unseen_unigram_abnormal_input": { "run": true, "severity_thresholds": [ 0.0, 0.05, 0.1 ], "performance_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "empty_string_abnormal_input": { "run": true, "severity_thresholds": [ 0.0, 0.05, 0.1 ], "performance_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "char_dist_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 0, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "unigrams_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 5, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "bigrams_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 5, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "invisible_chars_attack": { "run": false, "sample_size": 200, "severity_thresholds": [ 0.1, 0.2, 0.3 ], "attack_params": { "target_score": 0.0, "max_queries": 500, "population_size": 15, "max_unsuccessful_iters": 15 }, "mutation_params": { "aug_char_p": 0.01 }, "max_str_len": 2000 }, "deletion_chars_attack": { "run": false, "sample_size": 200, "severity_thresholds": [ 0.1, 0.2, 0.3 ], "attack_params": { "target_score": 0.0, "max_queries": 500, "population_size": 15, "max_unsuccessful_iters": 15 }, "mutation_params": { "aug_char_p": 0.01 }, "max_str_len": 2000 }, "intentional_homoglyph_attack": { "run": false, "sample_size": 200, "severity_thresholds": [ 0.1, 0.2, 0.3 ], "attack_params": { "target_score": 0.0, "max_queries": 500, "population_size": 15, "max_unsuccessful_iters": 15 }, "mutation_params": { "aug_char_p": 0.01 }, "max_str_len": 2000 }, "confusable_homoglyph_attack": { "run": false, "sample_size": 200, "severity_thresholds": [ 0.1, 0.2, 0.3 ], "attack_params": { "target_score": 0.0, "max_queries": 500, "population_size": 15, "max_unsuccessful_iters": 15 }, "mutation_params": { "aug_char_p": 0.01 }, "max_str_len": 2000 }, "universal_triggers_attack": { "run": false, "sample_size": 15, "batch_size": 10, "severity_thresholds": [ 0.1, 0.2, 0.3 ], "attack_params": { "target_score": 0.0, "max_queries": 500, "population_size": 1, "max_unsuccessful_iters": 400 }, "mutation_params": { "prefix_len": 10, "aug_word_p": 0.2 } }, "char_substitution_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": { "min_char": 2, "aug_char_max": 1, "aug_char_min": 1, "aug_char_p": 0.3, "aug_word_min": 1, "aug_word_max": 10, "aug_word_p": 0.3 } }, "char_deletion_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": { "min_char": 2, "aug_char_max": 1, "aug_char_min": 1, "aug_char_p": 0.3, "aug_word_min": 1, "aug_word_max": 10, "aug_word_p": 0.3 } }, "char_insertion_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": { "min_char": 2, "aug_char_max": 1, "aug_char_min": 1, "aug_char_p": 0.3, "aug_word_min": 1, "aug_word_max": 10, "aug_word_p": 0.3 } }, "char_swap_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": { "min_char": 2, "aug_char_max": 1, "aug_char_min": 1, "aug_char_p": 0.3, "aug_word_min": 1, "aug_word_max": 10, "aug_word_p": 0.3 } }, "keyboard_aug_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": { "min_char": 2, "aug_char_max": 1, "aug_char_min": 1, "aug_char_p": 0.3, "aug_word_min": 1, "aug_word_max": 10, "aug_word_p": 0.3 } }, "common_misspelling_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": null }, "ocr_error_attack": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "attack_params": null }, "upper_case_text": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000 }, "lower_case_text": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000 }, "remove_special_chars": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000 }, "swap_masc_to_fem": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000 }, "swap_fem_to_masc": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000 }, "swap_fem_name_to_masc_name": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "source": null, "target": null, "case_invariant": false }, "swap_masc_name_to_fem_name": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000, "source": null, "target": null, "case_invariant": false }, "ascii": { "run": true, "sample_size": 200, "severity_thresholds": [ 0.01, 0.05, 0.1 ], "max_str_len": 2000 }, "label_subset": { "run": true, "metric_name": null, "num_subsets": 5, "min_subset_size": 10, "severity_thresholds": [ 0.05, 0.08, 0.13 ] }, "prediction_subset": { "run": true, "metric_name": null, "num_subsets": 5, "min_subset_size": 10, "severity_thresholds": [ 0.05, 0.08, 0.13 ] } } ```