Train NB on corpora in different sizes (100 to 1000 documents) to see the changes in classification accuracy.
l2 <- i %in% sample(i, 1000)
mt_test2 <- mt[!l2,]
data2 <- data.frame()
for (n in seq(100, 1000, by = 100)) {
for (m in seq(1:20)) {
mt_train2 <- mt[sample(i[l2], n),]
nb <- textmodel_nb(mt_train2, docvars(mt_train2, "manual"))
docvars(mt_test2, "nb") <- predict(nb, newdata = mt_test2) # since v1.2.2
#docvars(mt_test2, "nb") <- predict(nb, newdata = mt_test2)$nb.predicted # until v1.2.0
tb_temp <- table(docvars(mt_test2, "manual"), docvars(mt_test2, "nb"))
temp <- as.data.frame(rbind(accuracy(tb_temp)))
temp$size <- n
data2 <- rbind(data2, temp)
}
}