### Named Entity Recognition Testing Config Below is the default configuration for all Named Entity Recognition tests. A copy of this can also be found in your `rime_trial` bundle (inside the `nlp_examples/ner/default_test_config.json`). ```python { "categories": [], "run_default": null, "custom_tests": null, "numeric_outlier": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "min_normal_prop": 0.99, "baseline_quantile": 0.1, "perturb_multiplier": 1.0 }, "unseen_categorical": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "unseen_domain": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "unseen_email": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "unseen_url": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "rare_categories": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "include_columns": [], "min_num_occurrences": 0, "min_pct_occurrences": 0, "min_ratio_rel_uniform": 0.005 }, "out_of_range": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "std_factor": 3 }, "int_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "float_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "str_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "bool_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "url_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "domain_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "email_feature_type": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "req_characters": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "column_specific_params": {} }, "inconsistencies": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null }, "freq_ratio_threshold": 0.02, "min_correlation": 0.1, "max_pairwise_tests": 200, "max_unique_pairs_for_firewall": 15 }, "null_check": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "capitalization": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "empty_string": { "exclude_columns": [], "run": true, "performance_change_config": { "num_samples_to_simulate": 100, "min_num_samples": 10, "ignore_errors": false, "severity_thresholds": null } }, "feat_subset_auc": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_accuracy": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_f1": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_macro_f1": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_precision": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_macro_precision": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_fpr": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_recall": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_macro_recall": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_pred_variance_pos": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_pred_variance_neg": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_pred_variance_all": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_rmse": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_mae": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_rank_correlation": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_ndcg": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "feat_subset_mrr": { "exclude_columns": [], "run": true, "min_sample_size": 20, "performance_change_thresholds": null }, "correlation_drift": { "exclude_columns": [], "run": true, "min_correlation": 0.1, "correlation_thresholds": [ 0.1, 0.2, 0.3 ], "p_value_threshold": 0.05, "max_pairwise_tests": 200 }, "mutual_information_feat_drift": { "exclude_columns": [], "run": true, "min_mutual_information": 0.1, "mutual_information_thresholds": [ 0.1, 0.2, 0.3 ], "max_pairwise_tests": 200, "min_sample_size": 100 }, "mutual_information_label_drift": { "exclude_columns": [], "run": true, "min_mutual_information": 0.1, "mutual_information_thresholds": [ 0.1, 0.2, 0.3 ], "max_pairwise_tests": 200, "min_sample_size": 100 }, "categorical_label_drift": { "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "num_values_for_graph": 5, "distance_thresholds": [ 0.2, 0.4, 0.6 ] } }, "multiclass_pred_label_drift": { "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "num_values_for_graph": 5, "distance_thresholds": [ 0.2, 0.4, 0.6 ] } }, "regression_label_drift": { "run": true, "p_value_threshold": 0.05, "ks_stat_thresholds": [ 0.1, 0.33, 0.67 ] }, "categorical_drift": { "exclude_columns": [], "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "min_sample_size": 100, "max_sample_size": null, "distance_threshold": 0.2 } }, "null_proportion": { "exclude_columns": [], "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "p_value_threshold": 0.05, "min_sample_size": 100 }, "row_null_proportion": { "exclude_columns": [], "run": true, "drift_statistic": "Population Stability Index", "params": { "exclude_columns": [], "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "psi_threshold": 0.2 } }, "continuous_drift": { "exclude_columns": [], "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "drift_statistic": "Population Stability Index", "params": { "run": true, "drift_scaling_factor": 0.005, "performance_change_thresholds": null, "min_sample_size": 100, "min_num_quantiles": 1000, "distance_threshold": 0.2, "num_bins": 100 } }, "overall_metrics": { "run": true, "metrics_specific_thresholds": {} }, "prediction_drift": { "run": true, "drift_statistic": "Population Stability Index", "params": { "run": true, "min_sample_size": 100, "min_num_quantiles": 1000, "psi_thresholds": [ 0.2, 0.4, 0.6 ], "num_bins": 100 } }, "calibration_comparison": { "run": true, "severity_level_thresholds": [ 0.02, 0.06, 0.1 ] }, "global_sample_size": null, "metadata_tests": null, "unseen_unigram_abnormal_input": { "run": true, "severity_thresholds": [ 0.0, 0.05, 0.1 ], "performance_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "empty_string_abnormal_input": { "run": true, "severity_thresholds": [ 0.0, 0.05, 0.1 ], "performance_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "char_dist_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 0, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "unigrams_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 5, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "bigrams_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 5, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "lower_case_entity": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "upper_case_entity": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "ampersand": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "abbreviation": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "white_space_special_chars": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "ascii": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "remove_special_chars": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "swap_seen_entities": { "run": true, "sample_size": 200, "severity_thresholds": [ 0, 0.1, 0.3 ] }, "num_entities_subset": { "run": true, "metric_name": null, "num_subsets": 5, "min_subset_size": 10, "severity_thresholds": [ 0.05, 0.08, 0.13 ] }, "ground_truth_entity_type": { "run": true, "metric_name": null, "num_subsets": 5, "min_subset_size": 10, "severity_thresholds": [ 0.05, 0.08, 0.13 ] }, "predicted_entity_type": { "run": true, "metric_name": null, "num_subsets": 5, "min_subset_size": 10, "severity_thresholds": [ 0.05, 0.08, 0.13 ] }, "entity_types_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 0, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "predicted_entity_types_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 0, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] } }, "entity_lengths_drift": { "run": true, "drift_metrics": [ { "distance_metric": "Population Stability Index", "severity_threshold": [ 0.1, 0.2, 0.4 ] } ], "min_occurrences": 0, "model_impact_config": { "ignore_observed_performance": false, "min_num_samples": 10, "severity_thresholds": [ 0.01, 0.05, 0.1 ] }, "num_quantiles": 1001, "num_bins": 100 } } ```