==437123== Memcheck, a memory error detector
==437123== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al.
==437123== Using Valgrind-3.24.0 and LibVEX; rerun with -h for copyright info
==437123== Command: /data/blackswan/ripley/R/R-devel-vg/bin/exec/R --vanilla
==437123== 

R Under development (unstable) (2026-01-22 r89323) -- "Unsuffered Consequences"
Copyright (C) 2026 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> pkgname <- "OptimalBinningWoE"
> source(file.path(R.home("share"), "R", "examples-header.R"))
> options(warn = 1)
> library('OptimalBinningWoE')
> 
> base::assign(".oldSearch", base::search(), pos = 'CheckExEnv')
> base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv')
> cleanEx()
> nameEx("control.obwoe")
> ### * control.obwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: control.obwoe
> ### Title: Control Parameters for Optimal Binning Algorithms
> ### Aliases: control.obwoe
> 
> ### ** Examples
> 
> # Default control parameters
> ctrl_default <- control.obwoe()
> print(ctrl_default)
$bin_cutoff
[1] 0.05

$max_n_prebins
[1] 20

$convergence_threshold
[1] 1e-06

$max_iterations
[1] 1000

$bin_separator
[1] "%;%"

$verbose
[1] FALSE

attr(,"class")
[1] "obwoe_control"
> 
> # Conservative settings for production
> ctrl_production <- control.obwoe(
+   bin_cutoff = 0.03,
+   max_n_prebins = 30,
+   convergence_threshold = 1e-8,
+   max_iterations = 5000
+ )
> 
> # Aggressive settings for exploration
> ctrl_explore <- control.obwoe(
+   bin_cutoff = 0.01,
+   max_n_prebins = 50,
+   convergence_threshold = 1e-4,
+   max_iterations = 500
+ )
> 
> 
> 
> 
> cleanEx()
> nameEx("fit_logistic_regression")
> ### * fit_logistic_regression
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: fit_logistic_regression
> ### Title: Fit Logistic Regression Model
> ### Aliases: fit_logistic_regression
> 
> ### ** Examples
> 
> # Generate sample data
> set.seed(123)
> n <- 100
> p <- 3
> X <- matrix(rnorm(n * p), n, p)
> # Add intercept column
> X <- cbind(1, X)
> colnames(X) <- c("(Intercept)", "X1", "X2", "X3")
> 
> # True coefficients
> beta_true <- c(0.5, 1.2, -0.8, 0.3)
> 
> # Generate linear predictor
> eta <- X %*% beta_true
> 
> # Generate binary outcome
> prob <- 1 / (1 + exp(-eta))
> y <- rbinom(n, 1, prob)
> 
> # Fit logistic regression
> result <- fit_logistic_regression(X, y)
> 
> # View coefficients and statistics
> print(data.frame(
+   Coefficient = result$coefficients,
+   Std_Error = result$se,
+   Z_score = result$z_scores,
+   P_value = result$p_values
+ ))
  Coefficient Std_Error   Z_score     P_value
1   0.4677691 0.2424352  1.929460 0.053673775
2   1.2559101 0.3432823  3.658534 0.000253662
3  -0.7060973 0.2735459 -2.581275 0.009843607
4   0.5184608 0.2689094  1.928013 0.053853568
> 
> # Check convergence
> cat("Converged:", result$convergence, "\n")
Converged: TRUE 
> cat("Log-Likelihood:", result$loglikelihood, "\n")
Log-Likelihood: -51.99246 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_apply_woe_cat")
> ### * ob_apply_woe_cat
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_apply_woe_cat
> ### Title: Apply Optimal Weight of Evidence (WoE) to a Categorical Feature
> ### Aliases: ob_apply_woe_cat
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_apply_woe_num")
> ### * ob_apply_woe_num
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_apply_woe_num
> ### Title: Apply Optimal Weight of Evidence (WoE) to a Numerical Feature
> ### Aliases: ob_apply_woe_num
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_cm")
> ### * ob_categorical_cm
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_cm
> ### Title: Optimal Binning for Categorical Variables using Enhanced
> ###   ChiMerge Algorithm
> ### Aliases: ob_categorical_cm
> 
> ### ** Examples
> 
> # Example 1: Basic usage with synthetic data
> set.seed(123)
> n <- 1000
> categories <- c("A", "B", "C", "D", "E", "F", "G", "H")
> feature <- sample(categories, n, replace = TRUE, prob = c(
+   0.2, 0.15, 0.15,
+   0.1, 0.1, 0.1,
+   0.1, 0.1
+ ))
> # Create target with some association to categories
> probs <- c(0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85) # increasing probability
> target <- sapply(seq_along(feature), function(i) {
+   cat_idx <- which(categories == feature[i])
+   rbinom(1, 1, probs[cat_idx])
+ })
> 
> result <- ob_categorical_cm(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "A"     "B"     "C%;%D" "E%;%G" "F%;%H"

$woe
[1] -1.18847425 -0.80239061 -0.01771012  0.87051615  1.37170669

$iv
[1] 2.801835e-01 1.052373e-01 7.275865e-05 1.299588e-01 3.041406e-01

$count
[1] 198 159 242 195 206

> 
> # View metadata
> print(paste("Total IV:", round(result$metadata$total_iv, 3)))
[1] "Total IV: 0.82"
> print(paste("Algorithm converged:", result$converged))
[1] "Algorithm converged: TRUE"
> 
> # Example 2: Using Chi2 algorithm for more conservative binning
> result_chi2 <- ob_categorical_cm(feature, target,
+   use_chi2_algorithm = TRUE,
+   max_bins = 6
+ )
> 
> # Compare number of bins
> cat("Standard ChiMerge bins:", result$metadata$n_bins, "\n")
Standard ChiMerge bins: 5 
> cat("Chi2 algorithm bins:", result_chi2$metadata$n_bins, "\n")
Chi2 algorithm bins: 6 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_dmiv")
> ### * ob_categorical_dmiv
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_dmiv
> ### Title: Optimal Binning for Categorical Variables using Divergence
> ###   Measures
> ### Aliases: ob_categorical_dmiv
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_dp")
> ### * ob_categorical_dp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_dp
> ### Title: Optimal Binning for Categorical Variables using Dynamic
> ###   Programming
> ### Aliases: ob_categorical_dp
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_fetb")
> ### * ob_categorical_fetb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_fetb
> ### Title: Optimal Binning for Categorical Variables using Fisher's Exact
> ###   Test
> ### Aliases: ob_categorical_fetb
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_gmb")
> ### * ob_categorical_gmb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_gmb
> ### Title: Optimal Binning for Categorical Variables using Greedy Merge
> ###   Algorithm
> ### Aliases: ob_categorical_gmb
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_ivb")
> ### * ob_categorical_ivb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_ivb
> ### Title: Optimal Binning for Categorical Variables using Information
> ###   Value Dynamic Programming
> ### Aliases: ob_categorical_ivb
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_jedi")
> ### * ob_categorical_jedi
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_jedi
> ### Title: Optimal Binning for Categorical Variables using JEDI Algorithm
> ### Aliases: ob_categorical_jedi
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_jedi_mwoe")
> ### * ob_categorical_jedi_mwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_jedi_mwoe
> ### Title: Optimal Binning for Categorical Variables with Multinomial
> ###   Target using JEDI-MWoE
> ### Aliases: ob_categorical_jedi_mwoe
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_mba")
> ### * ob_categorical_mba
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_mba
> ### Title: Optimal Binning for Categorical Variables using Monotonic
> ###   Binning Algorithm
> ### Aliases: ob_categorical_mba
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_milp")
> ### * ob_categorical_milp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_milp
> ### Title: Optimal Binning for Categorical Variables using Heuristic
> ###   Algorithm
> ### Aliases: ob_categorical_milp
> 
> ### ** Examples
> 
> # Generate sample data
> set.seed(123)
> n <- 1000
> feature <- sample(letters[1:8], n, replace = TRUE)
> target <- rbinom(n, 1, prob = ifelse(feature %in% c("a", "b"), 0.7, 0.3))
> 
> # Perform optimal binning
> result <- ob_categorical_milp(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "h"             "b%;%f%;%c%;%g" "a%;%d%;%e"    

$woe
[1] -0.55634710  0.02805836  0.14604873

$iv
[1] 0.0379899544 0.0003986034 0.0078405377

$count
[1] 132 505 363

> 
> # With custom parameters
> result2 <- ob_categorical_milp(
+   feature = feature,
+   target = target,
+   min_bins = 2,
+   max_bins = 4,
+   bin_cutoff = 0.03
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 50)] <- NA
> result3 <- ob_categorical_milp(feature_with_na, target)
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_mob")
> ### * ob_categorical_mob
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_mob
> ### Title: Optimal Binning for Categorical Variables using Monotonic
> ###   Optimal Binning (MOB)
> ### Aliases: ob_categorical_mob
> 
> ### ** Examples
> 
> # Generate sample data
> set.seed(123)
> n <- 1000
> feature <- sample(letters[1:8], n, replace = TRUE)
> target <- rbinom(n, 1, prob = ifelse(feature %in% c("a", "b"), 0.7, 0.3))
> 
> # Perform optimal binning
> result <- ob_categorical_mob(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "c%;%f" "e%;%h" "d%;%g" "a%;%b"

$woe
[1] -0.6160508 -0.4979681 -0.2902418  1.2813219

$iv
[1] 0.08435798 0.05880863 0.02029562 0.41586980

$count
[1] 242 253 249 256

> 
> # With custom parameters
> result2 <- ob_categorical_mob(
+   feature = feature,
+   target = target,
+   min_bins = 2,
+   max_bins = 4,
+   bin_cutoff = 0.03
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 50)] <- NA
> result3 <- ob_categorical_mob(feature_with_na, target)
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_sab")
> ### * ob_categorical_sab
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_sab
> ### Title: Optimal Binning for Categorical Variables using Simulated
> ###   Annealing
> ### Aliases: ob_categorical_sab
> 
> ### ** Examples
> 
> # Generate sample data
> set.seed(123)
> n <- 1000
> feature <- sample(letters[1:8], n, replace = TRUE)
> target <- rbinom(n, 1, prob = ifelse(feature %in% c("a", "b"), 0.7, 0.3))
> 
> # Perform optimal binning
> result <- ob_categorical_sab(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "c"         "h"         "d"         "a%;%f%;%g" "b%;%e"    

$woe
[1] -0.5641325 -0.5563471 -0.3871070  0.1162201  0.5039019

$iv
[1] 0.036063906 0.037989954 0.016337184 0.005226565 0.065028691

$count
[1] 122 132 114 383 249

> 
> # With custom parameters
> result2 <- ob_categorical_sab(
+   feature = feature,
+   target = target,
+   min_bins = 2,
+   max_bins = 4,
+   initial_temperature = 2.0,
+   cooling_rate = 0.99
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 50)] <- NA
> result3 <- ob_categorical_sab(feature_with_na, target)
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_sblp")
> ### * ob_categorical_sblp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_sblp
> ### Title: Optimal Binning for Categorical Variables using SBLP
> ### Aliases: ob_categorical_sblp
> 
> ### ** Examples
> 
> # Generate sample data
> set.seed(123)
> n <- 1000
> feature <- sample(letters[1:8], n, replace = TRUE)
> # Create a relationship where 'a' and 'b' have high probability
> target <- rbinom(n, 1, prob = ifelse(feature %in% c("a", "b"), 0.8, 0.2))
> 
> # Perform optimal binning
> result <- ob_categorical_sblp(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "f"         "d%;%e"     "c%;%g%;%h" "a"         "b"        

$woe
[1] -1.1094145 -0.8567210 -0.6674771  1.8184450  2.2121155

$iv
[1] 0.1173476 0.1453568 0.1523956 0.4263670 0.5956975

$count
[1] 120 235 389 128 128

> 
> # Using a higher smoothing parameter (alpha)
> result_smooth <- ob_categorical_sblp(
+   feature = feature,
+   target = target,
+   alpha = 1.0
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 50)] <- NA
> result_na <- ob_categorical_sblp(feature_with_na, target)
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_sketch")
> ### * ob_categorical_sketch
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_sketch
> ### Title: Optimal Binning for Categorical Variables using Sketch-based
> ###   Algorithm
> ### Aliases: ob_categorical_sketch
> 
> ### ** Examples
> 
> # Generate sample data
> set.seed(123)
> n <- 10000
> feature <- sample(letters, n, replace = TRUE, prob = c(rep(0.04, 13), rep(0.02, 13)))
> # Create a relationship where early letters have higher probability
> target_probs <- ifelse(as.numeric(factor(feature)) <= 10, 0.7, 0.3)
> target <- rbinom(n, 1, prob = target_probs)
> 
> # Perform sketch-based optimal binning
> result <- ob_categorical_sketch(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "k%;%z%;%o%;%t"                 "u%;%v%;%y%;%l%;%m"            
[3] "n%;%p%;%x%;%w%;%r%;%s%;%q"     "h%;%j%;%f%;%g%;%b%;%d%;%c%;%i"
[5] "a%;%e"                        

$woe
[1] -1.0042752 -0.8308164 -0.8519467  0.8438619  0.8290098

$iv
[1] 0.12617874 0.12118256 0.11522899 0.27409018 0.06727459

$count
[1] 1354 1855 1682 4075 1034

> 
> # With custom sketch parameters for higher accuracy
> result_high_acc <- ob_categorical_sketch(
+   feature = feature,
+   target = target,
+   min_bins = 3,
+   max_bins = 7,
+   sketch_width = 4000,
+   sketch_depth = 7
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 200)] <- NA
> result_na <- ob_categorical_sketch(feature_with_na, target)
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_swb")
> ### * ob_categorical_swb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_swb
> ### Title: Optimal Binning for Categorical Variables using Sliding Window
> ###   Binning (SWB)
> ### Aliases: ob_categorical_swb
> 
> ### ** Examples
> 
> # Generate sample data with varying category frequencies
> set.seed(456)
> n <- 5000
> # Create categories with power-law frequency distribution
> categories <- c(
+   rep("A", 1500), rep("B", 1000), rep("C", 800),
+   rep("D", 500), rep("E", 300), rep("F", 200),
+   sample(letters[7:26], 700, replace = TRUE)
+ )
> feature <- sample(categories, n, replace = TRUE)
> # Create target with dependency on top categories
> target_probs <- ifelse(feature %in% c("A", "B"), 0.7,
+   ifelse(feature %in% c("C", "D"), 0.5, 0.3)
+ )
> target <- rbinom(n, 1, prob = target_probs)
> 
> # Perform sliding window binning
> result <- ob_categorical_swb(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "E"                                                                                
[2] "i%;%g%;%y%;%r%;%q%;%l%;%t%;%z%;%w%;%n%;%F%;%k%;%j%;%x%;%m%;%p%;%h%;%u%;%s%;%v%;%o"
[3] "C%;%D"                                                                            
[4] "A"                                                                                
[5] "B"                                                                                

$woe
[1] -1.1552050 -1.0541862 -0.1830370  0.6332108  0.6365736

$iv
[1] 0.073747445 0.195525930 0.008783081 0.112135969 0.076546047

$count
[1]  291  916 1303 1486 1004

> 
> # With stricter bin limits
> result_strict <- ob_categorical_swb(
+   feature = feature,
+   target = target,
+   min_bins = 4,
+   max_bins = 6
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 100)] <- NA
> result_na <- ob_categorical_swb(feature_with_na, target)
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_categorical_udt")
> ### * ob_categorical_udt
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_categorical_udt
> ### Title: Optimal Binning for Categorical Variables using a User-Defined
> ###   Technique (UDT)
> ### Aliases: ob_categorical_udt
> 
> ### ** Examples
> 
> # Generate sample data with skewed category distribution
> set.seed(789)
> n <- 3000
> # Power-law distributed categories
> categories <- c(
+   rep("X1", 1200), rep("X2", 800), rep("X3", 400),
+   sample(LETTERS[4:20], 600, replace = TRUE)
+ )
> feature <- sample(categories, n, replace = TRUE)
> # Target probabilities based on category importance
> probs <- ifelse(grepl("X", feature), 0.7,
+   ifelse(grepl("[A-C]", feature), 0.5, 0.3)
+ )
> target <- rbinom(n, 1, prob = probs)
> 
> # Perform user-defined technique binning
> result <- ob_categorical_udt(feature, target)
> print(result[c("bin", "woe", "iv", "count")])
$bin
[1] "K%;%S%;%H%;%F%;%O%;%T%;%I%;%G%;%M%;%P%;%L%;%E%;%Q%;%R"
[2] "N%;%J%;%D"                                            
[3] "X3"                                                   
[4] "X2"                                                   
[5] "X1"                                                   

$woe
[1] -1.3704280 -1.2269348  0.2610244  0.3379009  0.3845215

$iv
[1] 0.322320167 0.037475936 0.009463915 0.029143017 0.055138785

$count
[1]  513   73  431  802 1181

> 
> # Adjust parameters for finer control
> result_custom <- ob_categorical_udt(
+   feature = feature,
+   target = target,
+   min_bins = 2,
+   max_bins = 7,
+   bin_cutoff = 0.03
+ )
> 
> # Handling missing values
> feature_with_na <- feature
> feature_with_na[sample(length(feature_with_na), 150)] <- NA
> result_na <- ob_categorical_udt(feature_with_na, target)
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_check_distincts")
> ### * ob_check_distincts
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_check_distincts
> ### Title: Check Distinct Length
> ### Aliases: ob_check_distincts
> ### Keywords: internal
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_cutpoints_cat")
> ### * ob_cutpoints_cat
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_cutpoints_cat
> ### Title: Binning Categorical Variables using Custom Cutpoints
> ### Aliases: ob_cutpoints_cat
> 
> ### ** Examples
> 
> # Sample data
> feature <- c("A", "B", "C", "D", "A", "B", "C", "D")
> target <- c(1, 0, 1, 0, 1, 1, 0, 0)
> 
> # Define custom bins: (A,B) and (C,D)
> cutpoints <- c("A+B", "C+D")
> 
> # Apply binning
> result <- ob_cutpoints_cat(feature, target, cutpoints)
> 
> # View bin statistics
> print(result$woebin)
  bin count count_pos count_neg       woe        iv
1 A+B     4         3         1  1.098612 0.5493061
2 C+D     4         1         3 -1.098612 0.5493061
> 
> # View WoE-transformed feature
> print(result$woefeature)
[1]  1.098612  1.098612 -1.098612 -1.098612  1.098612  1.098612 -1.098612
[8] -1.098612
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_cutpoints_num")
> ### * ob_cutpoints_num
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_cutpoints_num
> ### Title: Binning Numerical Variables using Custom Cutpoints
> ### Aliases: ob_cutpoints_num
> 
> ### ** Examples
> 
> # Sample data
> feature <- c(5, 15, 25, 35, 45, 55, 65, 75)
> target <- c(0, 0, 1, 1, 1, 1, 0, 0)
> 
> # Define custom cutpoints
> cutpoints <- c(30, 60)
> 
> # Apply binning
> result <- ob_cutpoints_num(feature, target, cutpoints)
> 
> # View bin statistics
> print(result$woebin)
            bin count count_pos count_neg        woe        iv
1  [-Inf;30.00)     3         1         2 -0.6931472 0.1732868
2 [30.00;60.00)     3         3         0  8.9226583 6.6911015
3  [60.00;+Inf]     2         0         2 -8.5171932 4.2577449
> 
> # View WoE-transformed feature
> print(result$woefeature)
[1] -0.6931472 -0.6931472 -0.6931472  8.9226583  8.9226583  8.9226583 -8.5171932
[8] -8.5171932
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_gains_table")
> ### * ob_gains_table
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_gains_table
> ### Title: Compute Comprehensive Gains Table from Binning Results
> ### Aliases: ob_gains_table
> 
> ### ** Examples
> 
> # Manually constructed binning result
> bin_res <- list(
+   id = 1:3,
+   bin = c("Low", "Medium", "High"),
+   count = c(100, 200, 50),
+   count_pos = c(5, 30, 20),
+   count_neg = c(95, 170, 30)
+ )
> 
> gt <- ob_gains_table(bin_res)
> print(gt[, c("bin", "woe", "iv", "ks")])
     bin         woe          iv        ks
1    Low -1.26479681 0.292325919 0.2311248
2 Medium -0.05495888 0.001693648 0.2619414
3   High  1.27417706 0.333759785 0.0000000
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_gains_table_feature")
> ### * ob_gains_table_feature
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_gains_table_feature
> ### Title: Compute Gains Table for a Binned Feature Vector
> ### Aliases: ob_gains_table_feature
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_bb")
> ### * ob_numerical_bb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_bb
> ### Title: Optimal Binning for Numerical Variables using Branch and Bound
> ###   Algorithm
> ### Aliases: ob_numerical_bb
> 
> ### ** Examples
> 
> # Example: Binning a variable with a sigmoid relationship to target
> set.seed(123)
> n <- 1000
> # Generate feature
> feature <- rnorm(n)
> 
> # Generate target based on logistic probability
> prob <- 1 / (1 + exp(-2 * feature))
> target <- rbinom(n, 1, prob)
> 
> # Perform Optimal Binning
> result <- ob_numerical_bb(feature, target,
+   min_bins = 3,
+   max_bins = 5,
+   is_monotonic = TRUE
+ )
> 
> # Check results
> print(data.frame(
+   Bin = result$bin,
+   Count = result$count,
+   WoE = round(result$woe, 4),
+   IV = round(result$iv, 4)
+ ))
                   Bin Count     WoE     IV
1     (-Inf;-1.622584]    50 -3.4487 0.3213
2 (-1.622584;0.841413]   750 -0.3461 0.0882
3  (0.841413;1.254752]   100  1.9167 0.2916
4  (1.254752;1.676134]    50  3.5443 0.3473
5      (1.676134;+Inf]    50  3.5443 0.3473
> 
> cat("Total IV:", result$total_iv, "\n")
Total IV: 1.395824 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_cm")
> ### * ob_numerical_cm
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_cm
> ### Title: Optimal Binning for Numerical Variables using Enhanced ChiMerge
> ###   Algorithm
> ### Aliases: ob_numerical_cm
> 
> ### ** Examples
> 
> # Example 1: Standard ChiMerge
> set.seed(123)
> feature <- rnorm(1000)
> # Create a target with a relationship to the feature
> target <- rbinom(1000, 1, plogis(2 * feature))
> 
> res_cm <- ob_numerical_cm(feature, target,
+   min_bins = 3,
+   max_bins = 6,
+   init_method = "equal_frequency"
+ )
> 
> print(res_cm$bin)
[1] "(-Inf;0.664416]"     "(0.665160;0.840540]" "(0.844904;1.253815]"
[4] "(1.263185;1.675697]" "(1.684436;+Inf]"    
> print(res_cm$iv)
[1] 0.2381152 0.1010034 0.2925531 0.3484273 0.3484273
> 
> # Example 2: Using the Chi2 Algorithm variant
> res_chi2 <- ob_numerical_cm(feature, target,
+   min_bins = 3,
+   max_bins = 6,
+   use_chi2_algorithm = TRUE
+ )
==437123== Invalid read of size 8
==437123==    at 0x245C6695: OBN_CM::calculate_inconsistency_rate() const (packages/tests-vg/OptimalBinningWoE/src/OBN_CM_v5.cpp:868)
==437123==    by 0x245CC59E: OBN_CM::chi2_algorithm() (packages/tests-vg/OptimalBinningWoE/src/OBN_CM_v5.cpp:830)
==437123==    by 0x245CD19E: OBN_CM::fit() (packages/tests-vg/OptimalBinningWoE/src/OBN_CM_v5.cpp:1252)
==437123==    by 0x245C5C5C: optimal_binning_numerical_cm(Rcpp::Vector<13, Rcpp::PreserveStorage>, Rcpp::Vector<14, Rcpp::PreserveStorage>, int, int, double, int, double, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, double, bool) (packages/tests-vg/OptimalBinningWoE/src/OBN_CM_v5.cpp:1444)
==437123==    by 0x246BCD14: _OptimalBinningWoE_optimal_binning_numerical_cm (packages/tests-vg/OptimalBinningWoE/src/RcppExports.cpp:365)
==437123==    by 0x4A7CD3: R_doDotCall (svn/R-devel/src/main/dotcode.c:801)
==437123==    by 0x4A8363: do_dotcall (svn/R-devel/src/main/dotcode.c:1437)
==437123==    by 0x4E3FCC: bcEval_loop (svn/R-devel/src/main/eval.c:8132)
==437123==    by 0x4F1FD7: bcEval (svn/R-devel/src/main/eval.c:7515)
==437123==    by 0x4F1FD7: bcEval (svn/R-devel/src/main/eval.c:7500)
==437123==    by 0x4F230A: Rf_eval (svn/R-devel/src/main/eval.c:1167)
==437123==    by 0x4F408D: R_execClosure (svn/R-devel/src/main/eval.c:2389)
==437123==    by 0x4F4D46: applyClosure_core (svn/R-devel/src/main/eval.c:2302)
==437123==  Address 0x1efa2000 is 8 bytes after a block of size 7,960 alloc'd
==437123==    at 0x4843866: malloc (/builddir/build/BUILD/valgrind-3.24.0/coregrind/m_replacemalloc/vg_replace_malloc.c:446)
==437123==    by 0x534A8E: GetNewPage (svn/R-devel/src/main/memory.c:998)
==437123==    by 0x536B6E: Rf_allocVector3 (svn/R-devel/src/main/memory.c:2872)
==437123==    by 0x4E556E: Rf_allocVector (svn/R-devel/src/include/Rinlinedfuns.h:609)
==437123==    by 0x4E556E: bcEval_loop (svn/R-devel/src/main/eval.c:7660)
==437123==    by 0x4F1FD7: bcEval (svn/R-devel/src/main/eval.c:7515)
==437123==    by 0x4F1FD7: bcEval (svn/R-devel/src/main/eval.c:7500)
==437123==    by 0x4F230A: Rf_eval (svn/R-devel/src/main/eval.c:1167)
==437123==    by 0x4F408D: R_execClosure (svn/R-devel/src/main/eval.c:2389)
==437123==    by 0x4F4D46: applyClosure_core (svn/R-devel/src/main/eval.c:2302)
==437123==    by 0x4F2415: Rf_applyClosure (svn/R-devel/src/main/eval.c:2324)
==437123==    by 0x4F2415: Rf_eval (svn/R-devel/src/main/eval.c:1280)
==437123==    by 0x528DCB: Rf_ReplIteration (svn/R-devel/src/main/main.c:264)
==437123==    by 0x5291AF: R_ReplConsole (svn/R-devel/src/main/main.c:317)
==437123==    by 0x529244: run_Rmainloop (svn/R-devel/src/main/main.c:1235)
==437123== 
> 
> cat("Total IV (ChiMerge):", res_cm$total_iv, "\n")
Total IV (ChiMerge): 1.328526 
> cat("Total IV (Chi2):", res_chi2$total_iv, "\n")
Total IV (Chi2): 1.328526 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_dmiv")
> ### * ob_numerical_dmiv
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_dmiv
> ### Title: Optimal Binning using Metric Divergence Measures (Zeng, 2013)
> ### Aliases: ob_numerical_dmiv
> 
> ### ** Examples
> 
> # Example using the "he" (Hellinger) distance
> set.seed(123)
> feature <- rnorm(1000)
> target <- rbinom(1000, 1, plogis(feature))
> 
> result <- ob_numerical_dmiv(feature, target,
+   min_bins = 3,
+   max_bins = 5,
+   divergence_method = "he",
+   bin_method = "woe"
+ )
> 
> print(result$bin)
[1] "(-Inf;-1.622584]"      "(-1.622584;-1.049677]" "(-1.049677;0.664602]" 
[4] "(0.664602;1.254752]"   "(1.254752;+Inf]"      
> print(result$divergence)
[1] 0.013425252 0.017370581 0.002093325 0.011379313 0.039855640
> print(paste("Total Hellinger Distance:", round(result$total_divergence, 4)))
[1] "Total Hellinger Distance: 0.0841"
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_dp")
> ### * ob_numerical_dp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_dp
> ### Title: Optimal Binning for Numerical Variables using Dynamic
> ###   Programming
> ### Aliases: ob_numerical_dp
> 
> ### ** Examples
> 
> # Example: forcing a descending trend
> set.seed(123)
> feature <- runif(1000, 0, 100)
> # Target has a complex relationship, but we want to force a linear view
> target <- rbinom(1000, 1, 0.5 + 0.003 * feature) # slightly positive trend
> 
> # Force "descending" (even if data suggests ascending) to see enforcement
> result <- ob_numerical_dp(feature, target,
+   min_bins = 3,
+   max_bins = 5,
+   monotonic_trend = "descending"
+ )
> 
> print(result$bin)
[1] "(-Inf;89.189612]"      "(89.189612;95.310121]" "(95.310121;+Inf]"     
> print(result$woe) # Should be strictly decreasing
[1] -0.05783551  0.64413073  0.53267052
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_ewb")
> ### * ob_numerical_ewb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_ewb
> ### Title: Hybrid Optimal Binning using Equal-Width Initialization and IV
> ###   Optimization
> ### Aliases: ob_numerical_ewb
> 
> ### ** Examples
> 
> # Example 1: Uniform distribution (Ideal for Equal-Width)
> set.seed(123)
> feature <- runif(1000, 0, 100)
> target <- rbinom(1000, 1, plogis(0.05 * feature - 2))
> 
> res_ewb <- ob_numerical_ewb(feature, target, max_bins = 5)
> print(res_ewb$bin)
[1] "(-Inf;5.041231]"       "(5.041231;30.014710]"  "(30.014710;54.988190]"
[4] "(54.988190;79.961669]" "(79.961669;+Inf]"     
> print(paste("Total IV:", round(res_ewb$total_iv, 4)))
[1] "Total IV: 1.6104"
> 
> # Example 2: Effect of Outliers (The weakness of Equal-Width)
> feature_outlier <- c(feature, 10000) # One extreme outlier
> target_outlier <- c(target, 0)
> 
> # Note: The algorithm tries to recover, but the initial split is distorted
> res_outlier <- ob_numerical_ewb(feature_outlier, target_outlier, max_bins = 5)
> print(res_outlier$bin)
[1] "(-Inf;500.044208]"        "(500.044208;9500.002327]"
[3] "(9500.002327;+Inf]"      
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_fast_mdlp")
> ### * ob_numerical_fast_mdlp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_fast_mdlp
> ### Title: Optimal Binning using MDLP with Monotonicity Constraints
> ### Aliases: ob_numerical_fast_mdlp
> 
> ### ** Examples
> 
> # Example: Standard usage with monotonicity
> set.seed(123)
> feature <- rnorm(1000)
> target <- rbinom(1000, 1, plogis(2 * feature)) # Positive relationship
> 
> result <- ob_numerical_fast_mdlp(feature, target,
+   min_bins = 3,
+   max_bins = 6,
+   force_monotonicity = TRUE
+ )
> 
> print(result$bin)
[1] "(-Inf;-1.185289]"      "(-1.185289;-0.506334]" "(-0.506334;0.299594]" 
[4] "(0.299594;1.214589]"   "(1.214589;+Inf]"      
> print(result$woe) # Should show a monotonic trend
[1] -3.4659526 -1.6251167 -0.2908404  1.3817919  3.8181822
> 
> # Example: Disabling monotonicity for exploratory analysis
> result_no_mono <- ob_numerical_fast_mdlp(feature, target,
+   min_bins = 3,
+   max_bins = 6,
+   force_monotonicity = FALSE
+ )
> 
> print(result_no_mono$woe) # May show non-monotonic patterns
[1] -3.4659526 -1.6251167 -0.2908404  1.3817919  3.8181822
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_fetb")
> ### * ob_numerical_fetb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_fetb
> ### Title: Optimal Binning using Fisher's Exact Test
> ### Aliases: ob_numerical_fetb
> 
> ### ** Examples
> 
> # Example: Binning a small dataset where Fisher's Exact Test excels
> set.seed(123)
> feature <- rnorm(100)
> target <- rbinom(100, 1, 0.2)
> 
> result <- ob_numerical_fetb(feature, target,
+   min_bins = 2,
+   max_bins = 4,
+   max_n_prebins = 10
+ )
> 
> print(result$bin)
[1] "(-inf; -1.06782]"    "(-1.06782; 1.36065]" "(1.36065; inf]"     
> print(result$woe)
[1] -1.6521490  0.1035344  0.1685980
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_ir")
> ### * ob_numerical_ir
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_ir
> ### Title: Optimal Binning using Isotonic Regression (PAVA)
> ### Aliases: ob_numerical_ir
> 
> ### ** Examples
> 
> # Example: Forcing a monotonic WoE trend
> set.seed(123)
> feature <- rnorm(500)
> # Create a slightly noisy but generally increasing relationship
> prob <- plogis(0.5 * feature + rnorm(500, 0, 0.3))
> target <- rbinom(500, 1, prob)
> 
> result <- ob_numerical_ir(feature, target,
+   min_bins = 4,
+   max_bins = 6,
+   auto_monotonicity = TRUE
+ )
> 
> print(result$bin)
[1] "(-Inf;-0.945409]"      "(-0.945409;-0.388780]" "(-0.388780;0.020451]" 
[4] "(0.020451;0.418982]"   "(0.418982;0.976973]"   "(0.976973;+Inf]"      
> print(round(result$woe, 3))
[1] -0.339 -0.070 -0.096 -0.096  0.071  0.532
> print(paste("Monotonic Increasing:", result$monotone_increasing))
[1] "Monotonic Increasing: TRUE"
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_jedi")
> ### * ob_numerical_jedi
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_jedi
> ### Title: Optimal Binning using Joint Entropy-Driven Interval
> ###   Discretization (JEDI)
> ### Aliases: ob_numerical_jedi
> 
> ### ** Examples
> 
> # Example: Binning a variable with a complex relationship
> set.seed(123)
> feature <- rnorm(1000)
> # Target probability has a quadratic component (non-monotonic)
> # JEDI will try to force a monotonic approximation that maximizes IV
> target <- rbinom(1000, 1, plogis(0.5 * feature + 0.1 * feature^2))
> 
> result <- ob_numerical_jedi(feature, target,
+   min_bins = 3,
+   max_bins = 6,
+   max_n_prebins = 20
+ )
> 
> print(result$bin)
[1] "(-Inf;-1.052513]"      "(-1.052513;-0.097412]" "(-0.097412;0.840540]" 
[4] "(0.840540;1.253815]"   "(1.253815;1.675697]"   "(1.675697;+Inf]"      
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_jedi_mwoe")
> ### * ob_numerical_jedi_mwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_jedi_mwoe
> ### Title: Optimal Binning for Multiclass Targets using JEDI M-WOE
> ### Aliases: ob_numerical_jedi_mwoe
> 
> ### ** Examples
> 
> # Example: Multiclass target (0, 1, 2)
> set.seed(123)
> feature <- rnorm(1000)
> # Class 0: low feature, Class 1: medium, Class 2: high
> target <- cut(feature + rnorm(1000, 0, 0.5),
+   breaks = c(-Inf, -0.5, 0.5, Inf),
+   labels = FALSE
+ ) - 1
> 
> result <- ob_numerical_jedi_mwoe(feature, target,
+   min_bins = 3,
+   max_bins = 5
+ )
> 
> # Check WoE for Class 2 (High values)
> print(result$woe[, 3]) # Column 3 corresponds to Class 2
[1] -20.4501899 -20.4501899 -20.4501899 -20.4501899   0.3630006
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_kmb")
> ### * ob_numerical_kmb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_kmb
> ### Title: Optimal Binning using K-means Inspired Initialization (KMB)
> ### Aliases: ob_numerical_kmb
> 
> ### ** Examples
> 
> # Example: Comparing KMB with EWB on uniform data
> set.seed(123)
> feature <- runif(1000, 0, 100)
> target <- rbinom(1000, 1, plogis(0.02 * feature))
> 
> result_kmb <- ob_numerical_kmb(feature, target, max_bins = 5)
> print(result_kmb$bin)
[1] "(-Inf;20.025318]"      "(20.025318;40.004102]" "(40.004102;59.982886]"
[4] "(59.982886;79.961669]" "(79.961669;+Inf]"     
> print(paste("KMB Total IV:", round(result_kmb$total_iv, 4)))
[1] "KMB Total IV: 0.4792"
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_ldb")
> ### * ob_numerical_ldb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_ldb
> ### Title: Optimal Binning for Numerical Variables using Local Density
> ###   Binning
> ### Aliases: ob_numerical_ldb
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_lpdb")
> ### * ob_numerical_lpdb
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_lpdb
> ### Title: Optimal Binning using Local Polynomial Density Binning (LPDB)
> ### Aliases: ob_numerical_lpdb
> 
> ### ** Examples
> 
> # Example: Binning a tri-modal distribution
> set.seed(123)
> # Feature with three distinct clusters
> feature <- c(rnorm(300, mean = -3), rnorm(400, mean = 0), rnorm(300, mean = 3))
> # Target depends on these clusters
> target <- rbinom(1000, 1, plogis(feature))
> 
> result <- ob_numerical_lpdb(feature, target,
+   min_bins = 3,
+   max_bins = 5
+ )
> 
> print(result$bin) # Should ideally find cuts near -1.5 and 1.5
[1] "(-Inf; -1.851192]"     "(-1.851192; 0.120719]" "(0.120719; 1.985886]" 
[4] "(1.985886; 3.046734]"  "(3.046734; +Inf]"     
> print(result$monotonicity)
[1] "increasing"
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_mblp")
> ### * ob_numerical_mblp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_mblp
> ### Title: Optimal Binning for Numerical Features Using Monotonic Binning
> ###   via Linear Programming
> ### Aliases: ob_numerical_mblp
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_mdlp")
> ### * ob_numerical_mdlp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_mdlp
> ### Title: Optimal Binning for Numerical Features using Minimum Description
> ###   Length Principle
> ### Aliases: ob_numerical_mdlp
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_mob")
> ### * ob_numerical_mob
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_mob
> ### Title: Optimal Binning for Numerical Features using Monotonic Optimal
> ###   Binning
> ### Aliases: ob_numerical_mob
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_mrblp")
> ### * ob_numerical_mrblp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_mrblp
> ### Title: Optimal Binning for Numerical Features using Monotonic Risk
> ###   Binning with Likelihood Ratio Pre-binning
> ### Aliases: ob_numerical_mrblp
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_oslp")
> ### * ob_numerical_oslp
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_oslp
> ### Title: Optimal Binning for Numerical Variables using Optimal Supervised
> ###   Learning Partitioning
> ### Aliases: ob_numerical_oslp
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_sketch")
> ### * ob_numerical_sketch
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_sketch
> ### Title: Optimal Binning for Numerical Variables using Sketch-based
> ###   Algorithm
> ### Aliases: ob_numerical_sketch
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_numerical_ubsd")
> ### * ob_numerical_ubsd
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_numerical_ubsd
> ### Title: Optimal Binning for Numerical Variables using Unsupervised
> ###   Binning with Standard Deviation
> ### Aliases: ob_numerical_ubsd
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("ob_preprocess")
> ### * ob_preprocess
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: ob_preprocess
> ### Title: Data Preprocessor for Optimal Binning
> ### Aliases: ob_preprocess
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("obcorr")
> ### * obcorr
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obcorr
> ### Title: Compute Multiple Robust Correlations Between Numeric Variables
> ### Aliases: obcorr
> 
> ### ** Examples
> 
> # Create sample data
> set.seed(123)
> n <- 100
> df <- data.frame(
+   x1 = rnorm(n),
+   x2 = rnorm(n),
+   x3 = rt(n, df = 3), # Heavy-tailed distribution
+   x4 = sample(c(0, 1), n, replace = TRUE), # Binary variable
+   category = sample(letters[1:3], n, replace = TRUE) # Non-numeric column
+ )
> 
> # Add some relationships
> df$x2 <- df$x1 + rnorm(n, 0, 0.5)
> df$x3 <- df$x1^2 + rnorm(n, 0, 0.5)
> 
> # Compute all correlations
> result_all <- obcorr(df)
> head(result_all)
   x  y     pearson    spearman      kendall    hoeffding  distance    biweight
1 x1 x2  0.86981390  0.87913591  0.700202020  0.397341844 1.4207684  0.87255023
2 x1 x3  0.09556132  0.06517852  0.048080808  0.047496185 0.7460724  0.11266947
3 x1 x4  0.11946052  0.11432107  0.093808315 -0.005827769 0.2600756  0.11946052
4 x2 x3  0.10385820  0.07159916  0.050505051  0.016285581 0.5003787  0.15306172
5 x2 x4 -0.02020361 -0.01039282 -0.008528029 -0.008435515 0.1420518 -0.02020361
6 x3 x4 -0.09009510 -0.06720693 -0.055147919 -0.006921340 0.1976969 -0.09009510
        pbend
1  0.88270163
2  0.06725434
3  0.09814141
4  0.08868743
5 -0.01342052
6 -0.04556955
> 
> # Compute only robust correlations
> result_robust <- obcorr(df, method = "robust")
> 
> # Compute only Pearson correlation with 2 threads
> result_pearson <- obcorr(df, method = "pearson", threads = 2)
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe")
> ### * obwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe
> ### Title: Unified Optimal Binning and Weight of Evidence Transformation
> ### Aliases: obwoe
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_algorithm")
> ### * obwoe_algorithm
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_algorithm
> ### Title: Binning Algorithm Parameter
> ### Aliases: obwoe_algorithm
> 
> ### ** Examples
> 
> # Default: all algorithms
> obwoe_algorithm()
Binning Algorithm (qualitative)
29 possible values include:
'auto', 'jedi', 'jedi_mwoe', 'cm', 'dp', 'dmiv', 'fetb', 'mob', 'sketch',
'udt', 'gmb', 'ivb', 'mba', 'milp', 'sab', 'sblp', 'swb', 'bb', …, 'oslp', and
'ubsd'
> 
> # Restrict to universal algorithms for mixed data
> obwoe_algorithm(values = c("jedi", "mob", "dp", "cm"))
Binning Algorithm (qualitative)
4 possible values include:
'jedi', 'mob', 'dp', and 'cm'
> 
> # Numerical-only algorithms
> obwoe_algorithm(values = c("mdlp", "fast_mdlp", "ewb", "ir"))
Binning Algorithm (qualitative)
4 possible values include:
'mdlp', 'fast_mdlp', 'ewb', and 'ir'
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_algorithms")
> ### * obwoe_algorithms
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_algorithms
> ### Title: List Available Algorithms
> ### Aliases: obwoe_algorithms
> 
> ### ** Examples
> 
> obwoe_algorithms()
   algorithm numerical categorical multinomial
1         cm      TRUE        TRUE       FALSE
2       dmiv      TRUE        TRUE       FALSE
3         dp      TRUE        TRUE       FALSE
4       fetb      TRUE        TRUE       FALSE
5       jedi      TRUE        TRUE       FALSE
6  jedi_mwoe      TRUE        TRUE        TRUE
7        mob      TRUE        TRUE       FALSE
8     sketch      TRUE        TRUE       FALSE
9        udt      TRUE        TRUE       FALSE
10       gmb     FALSE        TRUE       FALSE
11       ivb     FALSE        TRUE       FALSE
12       mba     FALSE        TRUE       FALSE
13      milp     FALSE        TRUE       FALSE
14       sab     FALSE        TRUE       FALSE
15      sblp     FALSE        TRUE       FALSE
16       swb     FALSE        TRUE       FALSE
17        bb      TRUE       FALSE       FALSE
18       ewb      TRUE       FALSE       FALSE
19 fast_mdlp      TRUE       FALSE       FALSE
20        ir      TRUE       FALSE       FALSE
21       kmb      TRUE       FALSE       FALSE
22       ldb      TRUE       FALSE       FALSE
23      lpdb      TRUE       FALSE       FALSE
24      mblp      TRUE       FALSE       FALSE
25      mdlp      TRUE       FALSE       FALSE
26     mrblp      TRUE       FALSE       FALSE
27      oslp      TRUE       FALSE       FALSE
28      ubsd      TRUE       FALSE       FALSE
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_apply")
> ### * obwoe_apply
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_apply
> ### Title: Apply Weight of Evidence Transformations to New Data
> ### Aliases: obwoe_apply
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_bin_cutoff")
> ### * obwoe_bin_cutoff
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_bin_cutoff
> ### Title: Bin Cutoff Parameter
> ### Aliases: obwoe_bin_cutoff
> 
> ### ** Examples
> 
> obwoe_bin_cutoff()
Bin Support Cutoff (quantitative)
Range: [0.01, 0.1]
> obwoe_bin_cutoff(range = c(0.02, 0.08))
Bin Support Cutoff (quantitative)
Range: [0.02, 0.08]
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_gains")
> ### * obwoe_gains
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_gains
> ### Title: Gains Table Statistics for Credit Risk Scorecard Evaluation
> ### Aliases: obwoe_gains
> 
> ### ** Examples
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_max_bins")
> ### * obwoe_max_bins
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_max_bins
> ### Title: Maximum Bins Parameter
> ### Aliases: obwoe_max_bins
> 
> ### ** Examples
> 
> obwoe_max_bins()
Maximum Bins (quantitative)
Range: [5, 20]
> obwoe_max_bins(range = c(4L, 12L))
Maximum Bins (quantitative)
Range: [4, 12]
> 
> 
> 
> 
> cleanEx()
> nameEx("obwoe_min_bins")
> ### * obwoe_min_bins
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: obwoe_min_bins
> ### Title: Minimum Bins Parameter
> ### Aliases: obwoe_min_bins
> 
> ### ** Examples
> 
> obwoe_min_bins()
Minimum Bins (quantitative)
Range: [2, 5]
> obwoe_min_bins(range = c(3L, 7L))
Minimum Bins (quantitative)
Range: [3, 7]
> 
> 
> 
> 
> cleanEx()
> nameEx("plot.obwoe")
> ### * plot.obwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: plot.obwoe
> ### Title: Plot Method for obwoe Objects
> ### Aliases: plot.obwoe
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("step_obwoe")
> ### * step_obwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: step_obwoe
> ### Title: Optimal Binning and WoE Transformation Step
> ### Aliases: step_obwoe
> 
> ### ** Examples
> 
> 
> 
> 
> 
> cleanEx()
> nameEx("summary.obwoe")
> ### * summary.obwoe
> 
> flush(stderr()); flush(stdout())
> 
> ### Name: summary.obwoe
> ### Title: Summary Method for obwoe Objects
> ### Aliases: summary.obwoe
> 
> ### ** Examples
> 
> 
> 
> 
> 
> ### * <FOOTER>
> ###
> cleanEx()
> options(digits = 7L)
> base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
Time elapsed:  180.517 1.519 183.284 0.01 0.061 
> grDevices::dev.off()
null device 
          1 
> ###
> ### Local variables: ***
> ### mode: outline-minor ***
> ### outline-regexp: "\\(> \\)?### [*]+" ***
> ### End: ***
> quit('no')
==437123== 
==437123== HEAP SUMMARY:
==437123==     in use at exit: 268,174,048 bytes in 54,524 blocks
==437123==   total heap usage: 290,747 allocs, 236,223 frees, 555,063,059 bytes allocated
==437123== 
==437123== 400 bytes in 1 blocks are possibly lost in loss record 253 of 3,682
==437123==    at 0x484B133: calloc (/builddir/build/BUILD/valgrind-3.24.0/coregrind/m_replacemalloc/vg_replace_malloc.c:1675)
==437123==    by 0x4011F63: UnknownInlinedFun (/usr/src/debug/glibc-2.39-38.fc40.x86_64/elf/../include/rtld-malloc.h:44)
==437123==    by 0x4011F63: allocate_dtv (/usr/src/debug/glibc-2.39-38.fc40.x86_64/elf/../elf/dl-tls.c:395)
==437123==    by 0x4012A61: _dl_allocate_tls (/usr/src/debug/glibc-2.39-38.fc40.x86_64/elf/../elf/dl-tls.c:673)
==437123==    by 0x557CC03: allocate_stack (/usr/src/debug/glibc-2.39-38.fc40.x86_64/nptl/allocatestack.c:431)
==437123==    by 0x557CC03: pthread_create@@GLIBC_2.34 (/usr/src/debug/glibc-2.39-38.fc40.x86_64/nptl/pthread_create.c:660)
==437123==    by 0x54B0076: gomp_team_start (/usr/src/debug/gcc-14.2.1-3.fc40.x86_64/obj-x86_64-redhat-linux/x86_64-redhat-linux/libgomp/../../../libgomp/team.c:859)
==437123==    by 0x54A60A0: GOMP_parallel (/usr/src/debug/gcc-14.2.1-3.fc40.x86_64/obj-x86_64-redhat-linux/x86_64-redhat-linux/libgomp/../../../libgomp/parallel.c:176)
==437123==    by 0x2467ED82: obcorr(Rcpp::DataFrame_Impl<Rcpp::PreserveStorage>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int) (packages/tests-vg/OptimalBinningWoE/src/OB_Correlation.cpp:471)
==437123==    by 0x246C06F1: _OptimalBinningWoE_obcorr (packages/tests-vg/OptimalBinningWoE/src/RcppExports.cpp:780)
==437123==    by 0x4A7E0D: R_doDotCall (svn/R-devel/src/main/dotcode.c:760)
==437123==    by 0x4A8363: do_dotcall (svn/R-devel/src/main/dotcode.c:1437)
==437123==    by 0x4E3FCC: bcEval_loop (svn/R-devel/src/main/eval.c:8132)
==437123==    by 0x4F1FD7: bcEval (svn/R-devel/src/main/eval.c:7515)
==437123==    by 0x4F1FD7: bcEval (svn/R-devel/src/main/eval.c:7500)
==437123== 
==437123== LEAK SUMMARY:
==437123==    definitely lost: 0 bytes in 0 blocks
==437123==    indirectly lost: 0 bytes in 0 blocks
==437123==      possibly lost: 400 bytes in 1 blocks
==437123==    still reachable: 268,173,648 bytes in 54,523 blocks
==437123==         suppressed: 0 bytes in 0 blocks
==437123== Reachable blocks (those to which a pointer was found) are not shown.
==437123== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==437123== 
==437123== For lists of detected and suppressed errors, rerun with: -s
==437123== ERROR SUMMARY: 251 errors from 2 contexts (suppressed: 0 from 0)