Adding labels on curves in glmnet plot in R
As the labels are hard coded it is perhaps easier to write a quick function. This is just a quick shot, so can be changed to be more thorough. I would also note that when using the lasso there are normally a lot of variables so there will be a lot of overlap of the labels (as seen in your small example)
lbs_fun <- function(fit, ...) {
L <- length(fit$lambda)
x <- log(fit$lambda[L])
y <- fit$beta[, L]
labs <- names(y)
text(x, y, labels=labs, ...)
}
# plot
plot(fit, xvar="lambda")
# label
lbs_fun(fit)
how to plot the correct labels in glmnet?
As far as I understand it, what the plot is giving you is the value of the coefficients associated to the words that are significant. In your case, words 9-11, which are Kyoto, Japan and Tokyo (I can see that from the dtm
table). This normal plot library does not have I think what you say you would like to do. Instead, you can use library(plotmo)
as following:
library(dplyr)
library(tibble)
library(glmnet)
library(quanteda)
library(plotmo)
dtrain <- data_frame(text = c("Chinese Beijing Chinese",
"Chinese Chinese Shanghai",
"this is china",
"china is here",
'hello china',
"Chinese Beijing Chinese",
"Chinese Chinese Shanghai",
"this is china",
"china is here",
'hello china',
"Kyoto Japan",
"Tokyo Japan Chinese",
"Kyoto Japan",
"Tokyo Japan Chinese",
"Kyoto Japan",
"Tokyo Japan Chinese",
"Kyoto Japan",
"Tokyo Japan Chinese",
'japan'),
class = c(1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0))
dtm <- quanteda::dfm(dtrain$text)
fit <- glmnet(dtm, y = as.factor(dtrain$class), alpha = 1, family = 'binomial')
plot_glmnet(fit, label=3) # label the 3 biggest final coefs
Cheers !
plot.glmnet increase size of variable labels
As it looks as if the label size is hard coded (and global changes to cex
will change the other plot features) you can change plot.glmnet
# copy the plot function
myPlot <- plotCoef
# replace relevant part
body(myPlot)[[14]] <- quote(if (label) {
nnz = length(which)
xpos = max(index)
pos = 4
if (xvar == "lambda") {
xpos = min(index)
pos = 2
}
xpos = rep(xpos, nnz)
ypos = beta[, ncol(beta)]
text(xpos, ypos, paste(which), pos = pos, ...) # only changed this with ...
})
# copy first level of plot and replace plotCoef with myPlot
newplotter <- plot.glmnet
body(newplotter)[[3]] <- quote(myPlot(x$beta, lambda = x$lambda,
df = x$df, dev = x$dev.ratio,
label = label, xvar = xvar, ...))
This should increase the text (note the plot need to be wide enough)
newplotter(fits,label=TRUE,xvar="lambda", cex=1.2)
Adjusting top axis title and labels plots of plotmo::plot_glmnet in R
There is a line in the code of plot_glmnet, mtext(toplabel...) that does this..
Unfortunately if you want to remove that, you have to create a new function with this line removed, and assign the namespace:
new_plot_glmnet = function (x = stop("no 'x' argument"), xvar = c("rlambda", "lambda",
"norm", "dev"), label = 10, nresponse = NA, grid.col = NA,
s = NA, ...)
{
check.classname(x, "x", c("glmnet", "multnet"))
obj <- x
beta <- get.beta(obj$beta, nresponse)
ibeta <- nonzeroCoef(beta)
if (length(ibeta) == 0) {
plot(0:1, 0:1, col = 0)
legend("topleft", legend = "all glmnet coefficients are zero",
bty = "n")
return(invisible(NULL))
}
beta <- as.matrix(beta[ibeta, , drop = FALSE])
xlim <- dota("xlim", ...)
xvar <- match.arg1(xvar)
switch(xvar, norm = {
if (inherits(obj, "multnet") || inherits(obj, "mrelnet")) {
stop0("xvar=\"norm\" is not supported by plot_gbm for ",
"multiple responses (use plot.glmnet instead)")
}
x <- apply(abs(beta), 2, sum)
if (!is.specified(xlim)) xlim <- c(min(x), max(x))
xlab <- "L1 Norm"
approx.f <- 1
}, lambda = {
x <- log(obj$lambda)
if (!is.specified(xlim)) xlim <- c(min(x), max(x))
xlab <- "Log Lambda"
approx.f <- 0
}, rlambda = {
x <- log(obj$lambda)
if (!is.specified(xlim)) xlim <- c(max(x), min(x))
xlab <- "Log Lambda"
approx.f <- 0
}, dev = {
x <- obj$dev.ratio
if (!is.specified(xlim)) xlim <- c(min(x), max(x))
xlab <- "Fraction Deviance Explained"
approx.f <- 1
})
xlim <- fix.lim(xlim)
if (xvar != "rlambda")
stopifnot(xlim[1] < xlim[2])
else if (xlim[2] >= xlim[1])
stop0("xlim[1] must be bigger than xlim[2] for xvar=\"rlambda\"")
iname <- get.iname(beta, ibeta, label)
old.par <- par("mar", "mgp", "cex.axis", "cex.lab")
on.exit(par(mar = old.par$mar, mgp = old.par$mgp, cex.axis = old.par$cex.axis,
cex.lab = old.par$cex.lab))
mar4 <- old.par$mar[4]
if (length(iname)) {
cex.names <- min(1, max(0.5, 2.5/sqrt(length(iname))))
mar4 <- max(old.par$mar[4] + 1, 0.75 * cex.names * par("cex") *
max(nchar(names(iname))))
}
main <- dota("main", ...)
nlines.needed.for.main <- if (is.specified(main))
nlines(main) + 0.5
else 0
par(mar = c(old.par$mar[1], old.par$mar[2], max(old.par$mar[3],
nlines.needed.for.main + 2.6), mar4))
par(mgp = c(1.5, 0.4, 0))
par(cex.axis = 0.8)
ylab <- "Coefficients"
if (is.list(obj$beta))
ylab <- paste0(ylab, ": Response ", rownames(obj$dfmat)[nresponse])
coef.col <- get.coef.col(..., beta = beta)
keep <- which((coef.col != "NA") & (coef.col != "0"))
iname <- iname[iname %in% keep]
beta[-keep, ] <- NA
call.plot(graphics::matplot, force.x = x, force.y = t(beta),
force.main = "", force.col = coef.col, def.xlim = xlim,
def.xlab = xlab, def.ylab = ylab, def.lty = 1, def.lwd = 1,
def.type = "l", ...)
abline(h = 0, col = "gray", lty = 3)
maybe.grid(x = x, beta = beta, grid.col = grid.col, coef.col = coef.col,
...)
if (xvar == "rlambda") {
annotate.rlambda(lambda = obj$lambda, x = x, beta = beta,
s = s, grid.col = grid.col, coef.col = coef.col,
...)
toplab <- "Lambda"
}
else {
top.axis(obj, x, nresponse, approx.f)
toplab <- "Degrees of Freedom"
}
#mtext(toplab, side = 3, line = 1.5, cex = par("cex") * par("cex.lab"))
if (is.specified(main))
mtext(main, side = 3, line = 3, , cex = par("cex"))
if (length(iname))
right.labs(beta, iname, cex.names, coef.col)
invisible(NULL)
}
environment(new_plot_glmnet) <- asNamespace('plotmo')
Then you plot:
new_plot_glmnet(fit,xvar='lambda',label=7)
mtext('new top title', side=3,padj=-2)
Issue running glmnet() for mtcars dataset
As the commenter suggests you need to use the glmnet
method like so:
fit <- glmnet(as.matrix(mtcars[-1]), mtcars$mpg, alpha=1)
plot(fit)
Related Topics
Extracting Indices for Data Frame Rows That Have Max Value for Named Field
How to Get Name from a Value in an R Vector with Names
Force Facet_Wrap to Fill Bottom Row (And Leave Any "Gaps" in the Top Row)
How to Change the Default Directory in Rstudio (Or R)
How to Split a Data Frame Among Columns, Say at Every Nth Column
How to Specify the Size/Layout of a Single Plot to Match a Certain Grid in R
How to Modify Unexported Object in a Package
Shift a Column of Lists in Data.Table by Group
Is There a Table or Catalog of Aesthetics for Ggplot2
Linking Intel's Math Kernel Library (Mkl) to R on Windows
Manipulating Files with Non-English Names in R
Installing Package from a Local .Tar.Gz File on Linux
Extract Date from Given String in R
Generally Disable Dimension Dropping for Matrices
How to Expand a Large Dataframe in R
Smart Way to Chain Ifelse Statements
Predict X Values from Simple Fitting and Annoting It in the Plot