R Under development (unstable) (2025-01-15 r87581) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > pkgname <- "FeatureHashing" > source(file.path(R.home("share"), "R", "examples-header.R")) > options(warn = 1) > library('FeatureHashing') > > base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') > base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv') > cleanEx() > nameEx("CSCMatrix-class") > ### * CSCMatrix-class > > flush(stderr()); flush(stdout()) > > ### Name: CSCMatrix-class > ### Title: CSCMatrix > ### Aliases: CSCMatrix-class dim<-,CSCMatrix-method dim,CSCMatrix-method > ### %*%,CSCMatrix,numeric-method %*%,numeric,CSCMatrix-method > ### [,CSCMatrix,missing,numeric,ANY-method > ### [,CSCMatrix,numeric,missing,ANY-method > ### [,CSCMatrix,numeric,numeric,ANY-method > > ### ** Examples > > # construct a CSCMatrix > m <- hashed.model.matrix(~ ., CO2, 8) > # convert it to dgCMatrix > m2 <- as(m, "dgCMatrix") > > > > cleanEx() > nameEx("hash.mapping") > ### * hash.mapping > > flush(stderr()); flush(stdout()) > > ### Name: hash.mapping > ### Title: Extract mapping between hash and original values > ### Aliases: hash.mapping > > ### ** Examples > > data(ipinyou) > > m <- hashed.model.matrix(~., ipinyou.train, 2^10, create.mapping = TRUE) > mapping <- hash.mapping(m) > > > > > cleanEx() > nameEx("hash.size") > ### * hash.size > > flush(stderr()); flush(stdout()) > > ### Name: hash.size > ### Title: Compute minimum hash size to reduce collision rate > ### Aliases: hash.size > > ### ** Examples > > data(ipinyou) > > #First try with a size of 2^10 > mat1 <- hashed.model.matrix(~., ipinyou.train, 2^10, create.mapping = TRUE) > > #Extract mapping > mapping1 <- hash.mapping(mat1) > #Rate of collision > mean(duplicated(mapping1)) [1] 0.8818916 > > #Second try, the size is computed > size <- hash.size(ipinyou.train) > mat2 <- hashed.model.matrix(~., ipinyou.train, size, create.mapping = TRUE) > > #Extract mapping > mapping2 <- hash.mapping(mat2) > #Rate of collision > mean(duplicated(mapping2)) [1] 0.2312572 > > > > > cleanEx() > nameEx("hashed.model.matrix") > ### * hashed.model.matrix > > flush(stderr()); flush(stdout()) > > ### Name: hashed.model.matrix > ### Title: Create a model matrix with feature hashing > ### Aliases: hashed.model.matrix hashed.value hash.sign > ### hashed.interaction.value > > ### ** Examples > > # The following scripts show how to fit a logistic regression > # after feature hashing > ## Not run: > ##D data(ipinyou) > ##D f <- ~ IP + Region + City + AdExchange + Domain + > ##D URL + AdSlotId + AdSlotWidth + AdSlotHeight + > ##D AdSlotVisibility + AdSlotFormat + CreativeID + > ##D Adid + split(UserTag, delim = ",") > ##D # if the version of FeatureHashing is 0.8, please use the following command: > ##D # m.train <- as(hashed.model.matrix(f, ipinyou.train, 2^16, transpose = FALSE), "dgCMatrix") > ##D m.train <- hashed.model.matrix(f, ipinyou.train, 2^16) > ##D m.test <- hashed.model.matrix(f, ipinyou.test, 2^16) > ##D > ##D # logistic regression with glmnet > ##D > ##D library(glmnet) > ##D > ##D cv.g.lr <- cv.glmnet(m.train, ipinyou.train$IsClick, > ##D family = "binomial")#, type.measure = "auc") > ##D p.lr <- predict(cv.g.lr, m.test, s="lambda.min") > ##D auc(ipinyou.test$IsClick, p.lr) > ##D > ##D ## Per-Coordinate FTRL-Proximal with $L_1$ and $L_2$ Regularization for Logistic Regression > ##D > ##D # The following scripts use an implementation of the FTRL-Proximal for Logistic Regresion, > ##D # which is published in McMahan, Holt and Sculley et al. (2013), to predict the probability > ##D # (1-step prediction) and update the model simultaneously. > ##D > ##D > ##D source(system.file("ftprl.R", package = "FeatureHashing")) > ##D m.train <- hashed.model.matrix(f, ipinyou.train, 2^16, transpose = TRUE) > ##D ftprl <- initialize.ftprl(0.1, 1, 0.1, 0.1, 2^16) > ##D ftprl <- update.ftprl(ftprl, m.train, ipinyou.train$IsClick, predict = TRUE) > ##D auc(ipinyou.train$IsClick, attr(ftprl, "predict")) > ##D > ##D # If we use the same algorithm to predict the click through rate of the 3rd season of iPinYou, > ##D # the overall AUC will be 0.77 which is comparable to the overall AUC of the > ##D # 3rd season 0.76 reported in Zhang, Yuan, Wang, et al. (2014). > ## End(Not run) > > # The following scripts show the implementation of the FeatureHashing. > > # Below the original values will be project in a space of 2^6 dimensions > m <- hashed.model.matrix(~ ., CO2, 2^6, create.mapping = TRUE, + transpose = TRUE, is.dgCMatrix = FALSE) > > # Print the matrix via dgCMatrix > as(m, "dgCMatrix") 64 x 84 sparse Matrix of class "dgCMatrix" [[ suppressing 84 column names ‘1’, ‘2’, ‘3’ ... ]] [1,] 1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [2,] . . . . . . . . . . . . [3,] . . . . . . . . . . . . [4,] . . . . . . . . . . . . [5,] . . . . . . . . . . . . [6,] . . . . . . . . . . . . [7,] 1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [8,] . . . . . . . . . . . . [9,] . . . . . . . . . . . . [10,] . . . . . . . . . . . . [11,] . . . . . . . . . . . . [12,] . . . . . . . . . . . . [13,] . . . . . . . . . . . . [14,] . . . . . . . . . . . . [15,] . . . . . . . . . . . . [16,] . . . . . . . . . . . . [17,] . . . . . . . . . . . . [18,] . . . . . . . . . . . . [19,] . . . . . . . . . . . . [20,] . . . . . . . . . . . . [21,] . . . . . . . . . . . . [22,] . . . . . . . . . . . . [23,] . . . . . . . . . . . . [24,] . . . . . . . . . . . . [25,] . . . . . . . . . . . . [26,] . . . . . . . . . . . . [27,] . . . . . . . . . . . . [28,] . . . . . . . . . . . . [29,] . . . . . . . . . . . . [30,] . . . . . . . . . . . . [31,] 95 175.0 250.0 350.0 500.0 675.0 1000.0 95.0 175.0 250.0 350.0 500.0 [32,] 1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [33,] . . . . . . . . . . . . [34,] 1 1.0 1.0 1.0 1.0 1.0 1.0 . . . . . [35,] . . . . . . . . . . . . [36,] . . . . . . . . . . . . [37,] . . . . . . . . . . . . [38,] . . . . . . . 1.0 1.0 1.0 1.0 1.0 [39,] . . . . . . . . . . . . [40,] . . . . . . . . . . . . [41,] . . . . . . . . . . . . [42,] . . . . . . . . . . . . [43,] . . . . . . . . . . . . [44,] . . . . . . . . . . . . [45,] . . . . . . . . . . . . [46,] . . . . . . . . . . . . [47,] . . . . . . . . . . . . [48,] . . . . . . . . . . . . [49,] 16 30.4 34.8 37.2 35.3 39.2 39.7 13.6 27.3 37.1 41.8 40.6 [50,] . . . . . . . . . . . . [51,] . . . . . . . . . . . . [52,] . . . . . . . . . . . . [53,] . . . . . . . . . . . . [54,] . . . . . . . . . . . . [55,] . . . . . . . . . . . . [56,] . . . . . . . . . . . . [57,] . . . . . . . . . . . . [58,] . . . . . . . . . . . . [59,] . . . . . . . . . . . . [60,] . . . . . . . . . . . . [61,] . . . . . . . . . . . . [62,] . . . . . . . . . . . . [63,] . . . . . . . . . . . . [64,] . . . . . . . . . . . . [1,] 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [2,] . . . . . . . . . . . . [3,] . . . . . . . . . . . . [4,] . . . . . . . . . . . . [5,] . . . . . . . . . . . . [6,] . . . . . . . . . 1.0 1.0 1.0 [7,] 1.0 1.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 1.0 1.0 1.0 [8,] . . . . . . . . . . . . [9,] . . . . . . . . . . . . [10,] . . . . . . . . . . . . [11,] . . . . . . . . . . . . [12,] . . . . . . . . . . . . [13,] . . . . . . . . . . . . [14,] . . . . . . . . . . . . [15,] . . . . . . . . . . . . [16,] . . . . . . . . . . . . [17,] . . . . . . . . . . . . [18,] . . . . . . . . . . . . [19,] . . . . . . . . . 1.0 1.0 1.0 [20,] . . . . . . . . . . . . [21,] . . . . . . . . . . . . [22,] . . . . . . . . . . . . [23,] . . . . . . . . . . . . [24,] . . . . . . . . . . . . [25,] . . . . . . . . . . . . [26,] . . . . . . . . . . . . [27,] . . . . . . . . . . . . [28,] . . . . . . . . . . . . [29,] . . . . . . . . . . . . [30,] . . . . . . . . . . . . [31,] 675.0 1000.0 95.0 175.0 250.0 350.0 500.0 675.0 1000.0 95.0 175.0 250.0 [32,] 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 . . . [33,] . . . . . . . . . . . . [34,] . . . . . . . . . . . . [35,] . . . . . . . . . . . . [36,] . . . . . . . . . . . . [37,] . . . . . . . . . . . . [38,] 1.0 1.0 . . . . . . . . . . [39,] . . . . . . . . . . . . [40,] . . . . . . . . . . . . [41,] . . . . . . . . . . . . [42,] . . . . . . . . . . . . [43,] . . . . . . . . . . . . [44,] . . . . . . . . . . . . [45,] . . . . . . . . . . . . [46,] . . . . . . . . . . . . [47,] . . . . . . . . . . . . [48,] . . . . . . . . . . . . [49,] 41.4 44.3 16.2 32.4 40.3 42.1 42.9 43.9 45.5 14.2 24.1 30.3 [50,] . . . . . . . . . . . . [51,] . . . . . . . . . . . . [52,] . . . . . . . . . . . . [53,] . . . . . . . . . . . . [54,] . . . . . . . . . . . . [55,] . . . . . . . . . . . . [56,] . . . . . . . . . . . . [57,] . . . . . . . . . . . . [58,] . . . . . . . . . . . . [59,] . . . . . . . . . . . . [60,] . . . . . . . . . . . . [61,] . . . . . . . . . . . . [62,] . . . . . . . . . . . . [63,] . . . . . . . . . . . . [64,] . . . . . . . . . . . . [1,] 1.0 1.0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 1 [2,] . . . . . . . . . . . . . [3,] . . . . . . . . . . . . . [4,] . . . . . . . . . . . . . [5,] . . . . . . . . . . . . . [6,] 1.0 1.0 1.0 1.0 . . . . . . . . . [7,] 1.0 1.0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 1 [8,] . . . . . . . . . . . . . [9,] . . . . . . . . . . . . . [10,] . . . . . . . . . . . . . [11,] . . . . . . . . . . . . . [12,] . . . . . . . . . . . . . [13,] . . . . . . . . . . . . . [14,] . . . . . . . . . . . . . [15,] . . . . . . . . . . . . . [16,] . . . . . . . . . . . . . [17,] . . . . . . . . . . . . . [18,] . . . . . . . . . . . . . [19,] 1.0 1.0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 1 [20,] . . . . . . . . . . . . . [21,] . . . . . . . . . . . . . [22,] . . . . . . . . . . . . . [23,] . . . . . . . . . . . . . [24,] . . . . . . . . . . . . . [25,] . . . . . . . . . . . . . [26,] . . . . . . . . . . . . . [27,] . . . . 1.0 1.0 1 1.0 1.0 1.0 1.0 . . [28,] . . . . . . . . . . . . . [29,] . . . . . . . . . . . . . [30,] . . . . . . . . . . . . . [31,] 350.0 500.0 675.0 1000.0 95.0 175.0 250 350.0 500.0 675.0 1000.0 95.0 175 [32,] . . . . . . . . . . . . . [33,] . . . . . . . . . . . . . [34,] . . . . . . . . . . . . . [35,] . . . . . . . . . . . . . [36,] . . . . . . . . . . . . . [37,] . . . . . . . . . . . . . [38,] . . . . . . . . . . . . . [39,] . . . . . . . . . . . . . [40,] . . . . . . . . . . . . . [41,] . . . . . . . . . . . . . [42,] . . . . . . . . . . . . . [43,] . . . . . . . . . . . . . [44,] . . . . . . . . . . . . . [45,] . . . . . . . . . . . . . [46,] . . . . . . . . . . . 1.0 1 [47,] . . . . . . . . . . . . . [48,] . . . . . . . . . . . . . [49,] 34.6 32.5 35.4 38.7 9.3 27.3 35 38.8 38.6 37.5 42.4 15.1 21 [50,] . . . . . . . . . . . . . [51,] . . . . . . . . . . . . . [52,] . . . . . . . . . . . . . [53,] . . . . . . . . . . . . . [54,] . . . . . . . . . . . . . [55,] . . . . . . . . . . . . . [56,] . . . . . . . . . . . . . [57,] . . . . . . . . . . . . . [58,] . . . . . . . . . . . . . [59,] . . . . . . . . . . . . . [60,] . . . . . . . . . . . . . [61,] . . . . . . . . . . . . . [62,] . . . . . . . . . . . . . [63,] . . . . . . . . . . . . . [64,] . . . . . . . . . . . . . [1,] 1.0 1 1.0 1.0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1 [2,] . . . . . . . . . . . . 1 [3,] . . . . . . . . . . . . . [4,] . . . . . . . . . . . . . [5,] . . . . . . . . . . . . . [6,] . . . . . . . . . . . . . [7,] 1.0 1 1.0 1.0 1.0 . . . . . . . . [8,] . . . . . . . . . . . . . [9,] . . . . . . . . . . . . . [10,] . . . . . . . . . . . . . [11,] . . . . . . . . . . . . . [12,] . . . . . . . . . . . . . [13,] . . . . . . . . . . . . . [14,] . . . . . . . . . . . . . [15,] . . . . . . . . . . . . . [16,] . . . . . . . . . . . . . [17,] . . . . . 1.0 1.0 1.0 1 1.0 1.0 1.0 1 [18,] . . . . . . . . . . . . . [19,] 1.0 1 1.0 1.0 1.0 . . . . . . . . [20,] . . . . . . . . . . . . . [21,] . . . . . . . . . . . . . [22,] . . . . . . . . . . . . . [23,] . . . . . 1.0 1.0 1.0 1 1.0 1.0 1.0 . [24,] . . . . . . . . . . . . . [25,] . . . . . . . . . . . . . [26,] . . . . . . . . . . . . . [27,] . . . . . . . . . . . . . [28,] . . . . . . . . . . . . . [29,] . . . . . . . . . . . . . [30,] . . . . . . . . . . . . . [31,] 250.0 350 500.0 675.0 1000.0 95.0 175.0 250.0 350 500.0 675.0 1000.0 95 [32,] . . . . . 1.0 1.0 1.0 1 1.0 1.0 1.0 1 [33,] . . . . . . . . . . . . . [34,] . . . . . . . . . . . . . [35,] . . . . . . . . . . . . . [36,] . . . . . . . . . . . . . [37,] . . . . . . . . . . . . . [38,] . . . . . . . . . . . . . [39,] . . . . . . . . . . . . . [40,] . . . . . . . . . . . . . [41,] . . . . . . . . . . . . . [42,] . . . . . . . . . . . . . [43,] . . . . . . . . . . . . . [44,] . . . . . . . . . . . . . [45,] . . . . . . . . . . . . . [46,] 1.0 1 1.0 1.0 1.0 . . . . . . . . [47,] . . . . . . . . . . . . . [48,] . . . . . . . . . . . . . [49,] 38.1 34 38.9 39.6 41.4 10.6 19.2 26.2 30 30.9 32.4 35.5 12 [50,] . . . . . . . . . . . . . [51,] . . . . . . . . . . . . . [52,] . . . . . . . . . . . . . [53,] . . . . . . . . . . . . . [54,] . . . . . . . . . . . . . [55,] . . . . . . . . . . . . . [56,] . . . . . . . . . . . . . [57,] . . . . . . . . . . . . . [58,] . . . . . . . . . . . . . [59,] . . . . . . . . . . . . . [60,] . . . . . . . . . . . . . [61,] . . . . . . . . . . . . . [62,] . . . . . . . . . . . . . [63,] . . . . . . . . . . . . . [64,] . . . . . . . . . . . . . [1,] 1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [2,] 1 1.0 1.0 1.0 1.0 1.0 . . . . . . [3,] . . . . . . . . . . . . [4,] . . . . . . . . . . . . [5,] . . . . . . . . . . . . [6,] . . . . . . . . . . . . [7,] . . . . . . . . . . . . [8,] . . . . . . . . . . . . [9,] . . . . . . . . . . . . [10,] . . . . . . . . . . . . [11,] . . . . . . . . . . . . [12,] . . . . . . . . . . . . [13,] . . . . . . . . . . . . [14,] . . . . . . . . . . . . [15,] . . . . . . . . . . . . [16,] . . . . . . . . . . . . [17,] 1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [18,] . . . . . . . . . . . . [19,] . . . . . . . . . . . . [20,] . . . . . . . . . . . . [21,] . . . . . . 1.0 1.0 1.0 1.0 1.0 1.0 [22,] . . . . . . . . . . . . [23,] . . . . . . . . . . . . [24,] . . . . . . . . . . . . [25,] . . . . . . . . . . . . [26,] . . . . . . . . . . . . [27,] . . . . . . . . . . . . [28,] . . . . . . . . . . . . [29,] . . . . . . . . . . . . [30,] . . . . . . . . . . . . [31,] 175 250.0 350.0 500.0 675.0 1000.0 95.0 175.0 250.0 350.0 500.0 675.0 [32,] 1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 [33,] . . . . . . . . . . . . [34,] . . . . . . . . . . . . [35,] . . . . . . . . . . . . [36,] . . . . . . . . . . . . [37,] . . . . . . . . . . . . [38,] . . . . . . . . . . . . [39,] . . . . . . . . . . . . [40,] . . . . . . . . . . . . [41,] . . . . . . . . . . . . [42,] . . . . . . . . . . . . [43,] . . . . . . . . . . . . [44,] . . . . . . . . . . . . [45,] . . . . . . . . . . . . [46,] . . . . . . . . . . . . [47,] . . . . . . . . . . . . [48,] . . . . . . . . . . . . [49,] 22 30.6 31.8 32.4 31.1 31.5 11.3 19.4 25.8 27.9 28.5 28.1 [50,] . . . . . . . . . . . . [51,] . . . . . . . . . . . . [52,] . . . . . . . . . . . . [53,] . . . . . . . . . . . . [54,] . . . . . . . . . . . . [55,] . . . . . . . . . . . . [56,] . . . . . . . . . . . . [57,] . . . . . . . . . . . . [58,] . . . . . . . . . . . . [59,] . . . . . . . . . . . . [60,] . . . . . . . . . . . . [61,] . . . . . . . . . . . . [62,] . . . . . . . . . . . . [63,] . . . . . . . . . . . . [64,] . . . . . . . . . . . . [1,] 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1 [2,] . . . . . . . . . . . . [3,] . . . . . . . . . . . . [4,] . . . . . . . . . . . . [5,] . . . . . . . . . . . . [6,] . . . . . . . . . . . . [7,] . . . . . . . . . . . . [8,] . . . . . . . . . . . . [9,] . . . . . . . . . . . . [10,] . . . . . . . . . . . . [11,] . . . . . . . . . . . . [12,] . . . . . . . . . . . . [13,] . . . . . . . . . . . . [14,] . . . . . . . . . . . . [15,] . . . . . . . . . . . . [16,] . . . . . . . . . . . . [17,] 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1 [18,] . . . . . . . . . . . . [19,] . 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1 [20,] . . . . . . . . . . . . [21,] 1.0 . . . . . . . . . . . [22,] . . . . . . . . . . . . [23,] . . . . . . . . . . . . [24,] . . . . . . . . . . . . [25,] . . . . . . . . . . . . [26,] . . . . . . . . . . . . [27,] . . . . . . . . . . . . [28,] . . . . . . . . . . . . [29,] . . . . . . . . . . . . [30,] . . . . . . . . . . . . [31,] 1000.0 95.0 175.0 250.0 350.0 500.0 675.0 1000.0 95.0 175.0 250.0 350 [32,] 1.0 . . . . . . . . . . . [33,] . . . . . . . . . . . . [34,] . . . . . . . . . . . . [35,] . . . . . . . . . . . . [36,] . . . . . . . . . . . . [37,] . . . . . . . . . . . . [38,] . . . . . . . . . . . . [39,] . . . . . . . . . . . . [40,] . . . . . . . . . . . . [41,] . . . . . . . . . . . . [42,] . . . . . . . . 1.0 1.0 1.0 1 [43,] . . . . . . . . . . . . [44,] . . . . . . . . . . . . [45,] . . . . . . . . . . . . [46,] . . . . . . . . . . . . [47,] . . . . . . . . . . . . [48,] . . . . . . . . . . . . [49,] 27.8 10.5 14.9 18.1 18.9 19.5 22.2 21.9 7.7 11.4 12.3 13 [50,] . . . . . . . . . . . . [51,] . . . . . . . . . . . . [52,] . . . . . . . . . . . . [53,] . . . . . . . . . . . . [54,] . . . . . . . . . . . . [55,] . 1.0 1.0 1.0 1.0 1.0 1.0 1.0 . . . . [56,] . . . . . . . . . . . . [57,] . . . . . . . . . . . . [58,] . . . . . . . . . . . . [59,] . . . . . . . . . . . . [60,] . . . . . . . . . . . . [61,] . . . . . . . . . . . . [62,] . . . . . . . . . . . . [63,] . . . . . . . . . . . . [64,] . . . . . . . . . . . . [1,] 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 [2,] . . . . . . . . . . [3,] . . . . . . . . . . [4,] . . . . . . . . . . [5,] . . . . . . . . . . [6,] . . . . . . . . . . [7,] . . . . . . . . . . [8,] . . . . . . . . . . [9,] . . . . . . . . . . [10,] . . . . . . . . . . [11,] . . . . . . . . . . [12,] . . . . . . . . . . [13,] . . . . . . . . . . [14,] . . . . . . . . . . [15,] . . . . . . . . . . [16,] . . . . . . . . . . [17,] 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 [18,] . . . . . . . . . . [19,] 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 [20,] . . . . . . . . . . [21,] . . . . . . . . . . [22,] . . . . . . . . . . [23,] . . . . . . . . . . [24,] . . . . . . . . . . [25,] . . . . . . . . . . [26,] . . . . . . . . . . [27,] . . . . . . . . . . [28,] . . . . . . . . . . [29,] . . . . . . . . . . [30,] . . . . . . . . . . [31,] 500.0 675.0 1000.0 95.0 175 250.0 350.0 500.0 675.0 1000.0 [32,] . . . . . . . . . . [33,] . . . . . . . . . . [34,] . . . . . . . . . . [35,] . . . . . . . . . . [36,] . . . . . . . . . . [37,] . . . . . . . . . . [38,] . . . . . . . . . . [39,] . . . . . . . . . . [40,] . . . . . . . . . . [41,] . . . . . . . . . . [42,] 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 1.0 [43,] . . . . . . . . . . [44,] . . . . . . . . . . [45,] . . . . . . . . . . [46,] . . . . . . . . . . [47,] . . . . . . . . . . [48,] . . . . . . . . . . [49,] 12.5 13.7 14.4 10.6 18 17.9 17.9 17.9 18.9 19.9 [50,] . . . . . . . . . . [51,] . . . . . . . . . . [52,] . . . . . . . . . . [53,] . . . . . . . . . . [54,] . . . . . . . . . . [55,] . . . . . . . . . . [56,] . . . . . . . . . . [57,] . . . . . . . . . . [58,] . . . . . . . . . . [59,] . . . . . . . . . . [60,] . . . . . . . . . . [61,] . . . . . . . . . . [62,] . . . . . . . . . . [63,] . . . . . . . . . . [64,] . . . . . . . . . . > > # Extraction of the dictionary: values with their hash > mapping <- hash.mapping(m) > > # To check the rate of collisions, we will extract the indices of the hash > # values through the modulo-division method, count how many duplicates > # we have (in best case it should be zero) and perform a mean. > mean(duplicated(mapping)) [1] 0.1111111 > > # The type of the result produced by the function `hashed.model.matrix` > # is a CSCMatrix. It supports simple subsetting > # and matrix-vector multiplication > rnorm(2^6) %*% m [1] 126.9814 234.0580 335.4646 471.0629 675.0783 912.4091 1353.9238 [8] 127.2455 234.4007 335.2007 470.5406 674.4774 912.1564 1353.4015 [15] 127.5002 234.3745 335.3880 471.0537 674.7657 912.4223 1353.8134 [22] 127.3410 234.9231 336.1275 471.5124 675.5503 912.9934 1354.1935 [29] 128.5561 235.2283 336.2642 471.7052 675.5296 913.4221 1354.4425 [36] 127.3528 235.3844 335.3642 471.6928 674.9442 912.6345 1354.0031 [43] 127.2128 234.9410 336.0556 471.4966 675.1974 912.7978 1354.0204 [50] 127.1646 234.7355 335.6703 471.4034 675.1380 913.0529 1354.5789 [57] 127.9786 235.7630 336.9449 472.5769 676.3115 914.1253 1355.7299 [64] 129.5065 237.7066 339.2480 475.0261 678.7606 916.2262 1357.8308 [71] 128.1347 236.4134 338.2132 474.0026 677.8607 915.4948 1356.9870 [78] 127.8089 235.6719 337.5841 473.4521 677.2540 914.9106 1356.3691 > > # Detail of the hashing > # To hash one specific value, we can use the `hashed.value` function > # Below we will apply this function to the feature names > vectHash <- hashed.value(names(mapping)) > > # Now we will check that the result is the same than the one got with > # the more generation `hashed.model.matrix` function. > # We will use the Modulo-division method (that's the [%% 2^6] below) > # to find the address in hash table easily. > stopifnot(all(vectHash %% 2^6 + 1 == mapping)) > > # The sign is corrected by `hash.sign` > hash.sign(names(mapping)) [1] -1 1 1 -1 -1 1 -1 -1 1 1 1 1 -1 1 -1 1 1 1 > > ## The interaction term is implemented as follow: > m2 <- hashed.model.matrix(~ .^2, CO2, 2^6, create.mapping = TRUE, + transpose = TRUE, is.dgCMatrix = FALSE) pmurhash.c:224:19: runtime error: left shift of 225 by 24 places cannot be represented in type 'int' #0 0x7f924892efdb in PMurHash32_Process /tmp/Rtmpaf6nF0/R.INSTALL4647c2dfdae57/digest/src/pmurhash.c:224 #1 0x7f924892f4c9 in PMurHash32 /tmp/Rtmpaf6nF0/R.INSTALL4647c2dfdae57/digest/src/pmurhash.c:312 #2 0x7f9243a90e99 in MurmurHash3LogHashFunction::operator()(char const*, int, bool) /data/gannet/ripley/R/packages/tests-gcc-SAN/FeatureHashing/src/hash_function.h:78 #3 0x7f9243a7dd85 in InteractionConverter::get_hashed_feature(HashFunction*, unsigned int, unsigned int) /data/gannet/ripley/R/packages/tests-gcc-SAN/FeatureHashing/src/vector_converter.h:537 #4 0x7f9243a7dd85 in InteractionConverter::get_feature(unsigned long) /data/gannet/ripley/R/packages/tests-gcc-SAN/FeatureHashing/src/vector_converter.h:497 #5 0x7f9243ac9ed4 in SEXPREC* hashed_model_matrix >(Rcpp::RObject_Impl, Rcpp::DataFrame_Impl, unsigned long, bool, Rcpp::S4_Impl, bool, bool, bool) /data/gannet/ripley/R/packages/tests-gcc-SAN/FeatureHashing/src/hashed_model_matrix.cpp:226 #6 0x7f9243a76cc1 in hashed_model_matrix_dataframe(Rcpp::RObject_Impl, Rcpp::DataFrame_Impl, unsigned long, bool, Rcpp::S4_Impl, bool, bool, bool) /data/gannet/ripley/R/packages/tests-gcc-SAN/FeatureHashing/src/hashed_model_matrix.cpp:318 #7 0x7f9243a3306b in _FeatureHashing_hashed_model_matrix_dataframe /data/gannet/ripley/R/packages/tests-gcc-SAN/FeatureHashing/src/RcppExports.cpp:105 #8 0x723715 in R_doDotCall /data/gannet/ripley/R/svn/R-devel/src/main/dotcode.c:780 #9 0x73aff2 in do_dotcall /data/gannet/ripley/R/svn/R-devel/src/main/dotcode.c:1437 #10 0x8a7243 in bcEval_loop /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:8122 #11 0x87840f in bcEval /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:7505 #12 0x83f152 in Rf_eval /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:1167 #13 0x84a122 in R_execClosure /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:2393 #14 0x83daca in applyClosure_core /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:2306 #15 0x83f7d6 in Rf_applyClosure /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:2328 #16 0x83f7d6 in Rf_eval /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:1280 #17 0x8632c6 in do_set /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:3571 #18 0x83fc06 in Rf_eval /data/gannet/ripley/R/svn/R-devel/src/main/eval.c:1232 #19 0x9c9699 in Rf_ReplIteration /data/gannet/ripley/R/svn/R-devel/src/main/main.c:265 #20 0x9c9699 in R_ReplConsole /data/gannet/ripley/R/svn/R-devel/src/main/main.c:317 #21 0x9cab9b in run_Rmainloop /data/gannet/ripley/R/svn/R-devel/src/main/main.c:1219 #22 0x9d5112 in Rf_mainloop /data/gannet/ripley/R/svn/R-devel/src/main/main.c:1226 #23 0x4293ff in main /data/gannet/ripley/R/svn/R-devel/src/main/Rmain.c:29 #24 0x7f9259e2950f in __libc_start_call_main (/lib64/libc.so.6+0x2950f) (BuildId: 8257ee907646e9b057197533d1e4ac8ede7a9c5c) #25 0x7f9259e295c8 in __libc_start_main_alias_2 (/lib64/libc.so.6+0x295c8) (BuildId: 8257ee907646e9b057197533d1e4ac8ede7a9c5c) #26 0x429de4 in _start (/data/gannet/ripley/R/gcc-SAN3/bin/exec/R+0x429de4) (BuildId: 5e148b08f50883e4fe61db372b4722d6e52a85b3) > # The ^ operator indicates crossing to the specified degree. > # For example (a+b+c)^2 is identical to (a+b+c)*(a+b+c) > # which in turn expands to a formula containing the main effects > # for a, b and c together with their second-order interactions. > > # Extract the mapping > mapping2 <- hash.mapping(m2) > > # Get the hash of combination of two items, PlantQn2 and uptake > mapping2["PlantQn2:uptake"] PlantQn2:uptake 52 > > # Extract hash of each item > h1 <- hashed.value("PlantQn2") > h2 <- hashed.value("uptake") > > # Computation of hash of both items combined > h3 <- hashed.value(rawToChar(c(intToRaw(h1), intToRaw(h2)))) > stopifnot(h3 %% 2^6 + 1 == mapping2["PlantQn2:uptake"]) > > # The concatenated feature, i.e. the array type in hive > data(test.tag) > df <- data.frame(a = test.tag, b = rnorm(length(test.tag))) > m <- hashed.model.matrix(~ split(a, delim = ",", type = "existence"):b, df, 2^6, + create.mapping = TRUE) > # The column `a` is splitted by "," and have an interaction with "b": > mapping <- hash.mapping(m) > names(mapping) [1] "a24:b" "ant:b" "atc:b" "a3:b" "a15:b" "a25:b" "ach:b" "a4:b" [9] "a16:b" "a26:b" "ahc" "atc" "a17:b" "a27:b" "akh:b" "a6:b" [17] "a19:b" "atw:b" "a29:b" "a10" "atn" "a11" "a8:b" "a12" [25] "atp" "a1" "ail:b" "a9:b" "a3" "a15" "aty:b" "a4" [33] "a16" "a17" "a6" "a19" "atw" "a8" "ail" "a9" [41] "aty" "b" "a20" "a21" "a23" "a24" "a25" "a26" [49] "a27" "atn:b" "a10:b" "a20:b" "a29" "antw:b" "a30:b" "a11:b" [57] "a21:b" "ahc:b" "a12:b" "atp:b" "a30" "antw" "ant" "a1:b" [65] "a23:b" "ach" "akh" > > > > > ### *