This is the replication script for ‘archivist: An R Package for Managing, Recording and Restoring Data Analysis Results’ (Przemyslaw Biecek, Marcin Kosinski) submitted to JSS.
First, make sure that archivist
is installed.
if (!require(archivist)) {
install.packages("archivist")
library(archivist)
}
Reading artifacts from GitHub
archivist::aread('pbiecek/Eseje/arepo/ba7f58fafe7373420e3ddce039558140')
Reading artifacts from package
library("archivist")
models <- asearch("pbiecek/graphGallery", patterns = "class:lm")
modelsBIC <- sapply(models, BIC)
sort(modelsBIC)
## 990861c7c27812ee959f10e5f76fe2c3 2a6e492cb6982f230e48cf46023e2e4f
## 39.05577 67.52735
## 0a82efeb8250a47718cea9d7f64e5ae7 378237103bb60c58600fe69bed6c7f11
## 189.73593 189.73593
## 7f11e03539d48d35f7e7fe7780527ba7 c1b1ef7bcddefb181f79176015bc3931
## 189.73593 189.73593
## 0e213ac68a45b6cd454d06b91f991bc7 e58d2f9d50b67ce4d397bf015ec1259c
## 243.49450 243.49450
## 18a98048f0584469483afb65294ce3ed
## 396.16690
Reading artifacts from Shiny
# wake up the shiny container at shinyapps
# not needed for other shiny apps
invisible(xml2::read_html("https://cogito.shinyapps.io/archivistShiny/"))
# here we are reading the artifact
archivist::aread('https://cogito.shinyapps.io/archivistShiny/arepo/ca680b829abd8f0a4bd2347dcf9fe534')
Creation of a new empty repository
repo <- "arepo"
createLocalRepo(repoDir = repo, default = TRUE)
Deletion of an existing repository
repo <- "arepo"
deleteLocalRepo(repoDir = repo)
Copying artifacts from other repositories
repo <- "arepo"
createLocalRepo(repoDir = repo, default = TRUE)
copyRemoteRepo(repoTo = repo, md5hashes= "7f3453331910e3f321ef97d87adb5bad",
user = "pbiecek", repo = "graphGallery", repoType = "github")
Showing repository statistics
showLocalRepo(repoDir = repo, method = "tags")
## artifact
## 1 7f3453331910e3f321ef97d87adb5bad
## 2 7f3453331910e3f321ef97d87adb5bad
## 3 7f3453331910e3f321ef97d87adb5bad
## 4 7f3453331910e3f321ef97d87adb5bad
## 5 7f3453331910e3f321ef97d87adb5bad
## 6 7f3453331910e3f321ef97d87adb5bad
## 7 7f3453331910e3f321ef97d87adb5bad
## 8 7f3453331910e3f321ef97d87adb5bad
## 9 7f3453331910e3f321ef97d87adb5bad
## tag createdDate
## 1 format:rda 2016-12-31 15:50:59
## 2 name:pl1 2016-12-31 15:50:59
## 3 class:gg 2016-12-31 15:50:59
## 4 class:ggplot 2016-12-31 15:50:59
## 5 labelx:Sepal.Length 2016-12-31 15:50:59
## 6 labely:Petal.Length 2016-12-31 15:50:59
## 7 date:2016-12-31 15:50:59 2016-12-31 15:50:59
## 8 session_info:0c325724f6118fdd80e6504204b72cfa 2016-12-31 15:50:59
## 9 format:png 2016-12-31 15:51:00
summaryLocalRepo(repoDir =
system.file("graphGallery", package = "archivist"))
## Number of archived artifacts in Repository: 7
## Number of archived datasets in Repository: 3
## Number of various classes archived in Repository:
## Number
## lm 3
## data.frame 2
## summary.lm 1
## gg 2
## ggplot 2
## Saves per day in Repository:
## Saves
## 2016-02-07 5
## 2016-02-08 13
## 2016-03-04 3
## 2016-12-31 4
Setting a default repository
setRemoteRepo(user = "pbiecek", repo = "graphGallery", repoType = "github")
setLocalRepo(repoDir = system.file("graphGallery", package = "archivist"))
Saving to the default local repository
setLocalRepo(repoDir = repo)
data(iris)
saveToLocalRepo(iris)
## [1] "ff575c261c949d073b2895b05d1097c3"
aoptions("repoType", "github")
## [1] "github"
library("ggplot2")
repo <- "arepo"
pl <- qplot(Sepal.Length, Petal.Length, data = iris)
saveToRepo(pl, repoDir = repo)
## [1] "b725eae07eba5c170489435e3466b760"
## attr(,"data")
## [1] "ff575c261c949d073b2895b05d1097c3"
showLocalRepo(repoDir = repo, "tags")
## artifact
## 1 7f3453331910e3f321ef97d87adb5bad
## 2 7f3453331910e3f321ef97d87adb5bad
## 3 7f3453331910e3f321ef97d87adb5bad
## 4 7f3453331910e3f321ef97d87adb5bad
## 5 7f3453331910e3f321ef97d87adb5bad
## 6 7f3453331910e3f321ef97d87adb5bad
## 7 7f3453331910e3f321ef97d87adb5bad
## 8 7f3453331910e3f321ef97d87adb5bad
## 9 7f3453331910e3f321ef97d87adb5bad
## 10 ff575c261c949d073b2895b05d1097c3
## 11 ff575c261c949d073b2895b05d1097c3
## 12 ff575c261c949d073b2895b05d1097c3
## 13 ff575c261c949d073b2895b05d1097c3
## 14 ff575c261c949d073b2895b05d1097c3
## 15 ff575c261c949d073b2895b05d1097c3
## 16 ff575c261c949d073b2895b05d1097c3
## 17 ff575c261c949d073b2895b05d1097c3
## 18 ff575c261c949d073b2895b05d1097c3
## 19 73c0af8a919ed6f073abd3ccb6a2090b
## 20 ff575c261c949d073b2895b05d1097c3
## 21 ff575c261c949d073b2895b05d1097c3
## 22 b725eae07eba5c170489435e3466b760
## 23 b725eae07eba5c170489435e3466b760
## 24 b725eae07eba5c170489435e3466b760
## 25 b725eae07eba5c170489435e3466b760
## 26 b725eae07eba5c170489435e3466b760
## 27 b725eae07eba5c170489435e3466b760
## 28 b725eae07eba5c170489435e3466b760
## 29 6bc0b4ff7194b1580cbaf6da54e749c7
## 30 b725eae07eba5c170489435e3466b760
## 31 ff575c261c949d073b2895b05d1097c3
## 32 ff575c261c949d073b2895b05d1097c3
## 33 ff575c261c949d073b2895b05d1097c3
## 34 b725eae07eba5c170489435e3466b760
## tag createdDate
## 1 format:rda 2016-12-31 15:50:59
## 2 name:pl1 2016-12-31 15:50:59
## 3 class:gg 2016-12-31 15:50:59
## 4 class:ggplot 2016-12-31 15:50:59
## 5 labelx:Sepal.Length 2016-12-31 15:50:59
## 6 labely:Petal.Length 2016-12-31 15:50:59
## 7 date:2016-12-31 15:50:59 2016-12-31 15:50:59
## 8 session_info:0c325724f6118fdd80e6504204b72cfa 2016-12-31 15:50:59
## 9 format:png 2016-12-31 15:51:00
## 10 format:rda 2016-12-31 15:57:55
## 11 name:iris 2016-12-31 15:57:55
## 12 class:data.frame 2016-12-31 15:57:55
## 13 varname:Sepal.Length 2016-12-31 15:57:55
## 14 varname:Sepal.Width 2016-12-31 15:57:55
## 15 varname:Petal.Length 2016-12-31 15:57:55
## 16 varname:Petal.Width 2016-12-31 15:57:55
## 17 varname:Species 2016-12-31 15:57:55
## 18 date:2016-12-31 15:57:55 2016-12-31 15:57:55
## 19 format:rda 2016-12-31 15:57:55
## 20 session_info:73c0af8a919ed6f073abd3ccb6a2090b 2016-12-31 15:57:55
## 21 format:txt 2016-12-31 15:57:55
## 22 format:rda 2016-12-31 15:57:55
## 23 name:pl 2016-12-31 15:57:55
## 24 class:gg 2016-12-31 15:57:55
## 25 class:ggplot 2016-12-31 15:57:55
## 26 labelx:Sepal.Length 2016-12-31 15:57:55
## 27 labely:Petal.Length 2016-12-31 15:57:55
## 28 date:2016-12-31 15:57:55 2016-12-31 15:57:55
## 29 format:rda 2016-12-31 15:57:55
## 30 session_info:6bc0b4ff7194b1580cbaf6da54e749c7 2016-12-31 15:57:55
## 31 format:rda 2016-12-31 15:57:55
## 32 format:txt 2016-12-31 15:57:55
## 33 relationWith:b725eae07eba5c170489435e3466b760 2016-12-31 15:57:55
## 34 format:png 2016-12-31 15:57:55
Session info for this object
asession("11127cc6ce69a89d11d0e30865a33c13")
## [1] NA
library("archivist")
createLocalRepo("arepo", default = TRUE)
library("dplyr")
iris %a%
filter(Sepal.Length < 6) %a%
lm(Petal.Length~Species, data=.) %a%
summary() -> tmp
ahistory(tmp)
## iris [ff575c261c949d073b2895b05d1097c3]
## -> filter(Sepal.Length < 6) [d3696e13d15223c7d0bbccb33cc20a11]
## -> lm(Petal.Length ~ Species, data = .) [990861c7c27812ee959f10e5f76fe2c3]
## -> summary() [050e41ec3bc40b3004bc6bdd356acae7]
ahistory(md5hash = "050e41ec3bc40b3004bc6bdd356acae7")
## iris [ff575c261c949d073b2895b05d1097c3]
## -> filter(Sepal.Length < 6) [d3696e13d15223c7d0bbccb33cc20a11]
## -> lm(Petal.Length ~ Species, data = .) [990861c7c27812ee959f10e5f76fe2c3]
## -> summary() [050e41ec3bc40b3004bc6bdd356acae7]
Femote, local or in a package
loadFromRemoteRepo("7f3453331910e3f321ef97d87adb5bad", repo="graphGallery", user="pbiecek",
value=TRUE)
loadFromLocalRepo("7f3453", system.file("graphGallery", package = "archivist"), value=TRUE)
archivist::aread("pbiecek/graphGallery/7f3453331910e3f321ef97d87adb5bad")
library("archivist")
setLocalRepo(system.file("graphGallery", package = "archivist"))
# loadFromLocalRepo("7f3453", value=TRUE)
archivist::aread("7f3453")
setLocalRepo(system.file("graphGallery", package = "archivist"))
model <- aread("2a6e492cb6982f230e48cf46023e2e4f")
summary(model)
##
## Call:
## lm(formula = Petal.Length ~ Sepal.Length + Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76390 -0.17875 0.00716 0.17461 0.79954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.70234 0.23013 -7.397 1.01e-11 ***
## Sepal.Length 0.63211 0.04527 13.962 < 2e-16 ***
## Speciesversicolor 2.21014 0.07047 31.362 < 2e-16 ***
## Speciesvirginica 3.09000 0.09123 33.870 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2826 on 146 degrees of freedom
## Multiple R-squared: 0.9749, Adjusted R-squared: 0.9744
## F-statistic: 1890 on 3 and 146 DF, p-value: < 2.2e-16
digest::digest(model)
## [1] "2a6e492cb6982f230e48cf46023e2e4f"
rmFromLocalRepo("7f3453331910e3f321ef97d87adb5bad", repoDir = repo)
Remove all older than 30 days
(obj2rm <- searchInLocalRepo(list(dateFrom = "2010-01-01", dateTo = Sys.Date() - 30), repoDir = repo))
## character(0)
rmFromLocalRepo(obj2rm, repoDir = repo, many = TRUE)
searchInLocalRepo(pattern = "class:gg",
repoDir = system.file("graphGallery", package = "archivist"))
## [1] "7f3453331910e3f321ef97d87adb5bad" "369227e67f9164dcbe934dadf2b53cc2"
searchInLocalRepo(pattern = list(dateFrom = "2016-01-01",
dateTo = "2016-02-07" ),
repoDir = system.file("graphGallery", package = "archivist"))
## [1] "d9313a0de3e2980201a8971e3384ff26" "ff575c261c949d073b2895b05d1097c3"
## [3] "2a6e492cb6982f230e48cf46023e2e4f" "93ecfdf1436932e2860c6dbdf2abc2ad"
## [5] "afb2550d0f886f0cf3b050f04c5cd4f8"
searchInLocalRepo(pattern=c("class:gg", "labelx:Sepal.Length"),
repoDir = system.file("graphGallery", package = "archivist"))
## [1] "369227e67f9164dcbe934dadf2b53cc2" "7f3453331910e3f321ef97d87adb5bad"
arepo <- system.file("graphGallery", package = "archivist")
#shinySearchInLocalRepo(arepo)
library("archivist")
createLocalRepo("allModels", default = TRUE)
atrace("lm", "z")
## [1] "lm"
# in the article is only one call to lm()
lm(Sepal.Length~Sepal.Width, data=iris)
## Tracing lm(Sepal.Length ~ Sepal.Width, data = iris) on exit
##
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = iris)
##
## Coefficients:
## (Intercept) Sepal.Width
## 6.5262 -0.2234
lm(Sepal.Length~Petal.Length, data=iris)
## Tracing lm(Sepal.Length ~ Petal.Length, data = iris) on exit
##
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = iris)
##
## Coefficients:
## (Intercept) Petal.Length
## 4.3066 0.4089
lm(Sepal.Length~Petal.Length, data=iris)
## Tracing lm(Sepal.Length ~ Petal.Length, data = iris) on exit
##
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = iris)
##
## Coefficients:
## (Intercept) Petal.Length
## 4.3066 0.4089
sapply(asearch("class:lm"), BIC)
## 42fcf77af2c40f70c445cbba513aeabd 5c5751e36b31b2251d2767d96993320a
## 381.0236 169.0723
deleteLocalRepo("allModels")
Requires a knitr report to work
# addHooksToPrint(class=c("ggplot", "data.frame"),
# repoDir = "arepo",
# repo = "Eseje", user = "pbiecek", subdir = "arepo")
Requires a GitHub repository to work
# createMDGallery("arepo/readme.md",
# repo="Eseje", user = "pbiecek", subdir = "arepo",
# addMiniature = TRUE, addTags = TRUE)
asession("pbiecek/graphGallery/arepo/600bda83cb840947976bd1ce3a11879d")
## setting value
## version R version 3.2.2 (2015-08-14)
## system x86_64, darwin13.4.0
## ui RStudio (0.99.441)
## language (EN)
## collate en_US.UTF-8
## tz Europe/Warsaw
## date 2016-02-09
##
## package * version date source
## acepack 1.3-3.3 2013-05-03 CRAN (R 3.1.0)
## archivist * 1.9.7.2 2016-02-08 CRAN (R 3.2.2)
## assertthat 0.1 2013-12-06 CRAN (R 3.1.0)
## bitops 1.0-6 2013-08-17 CRAN (R 3.1.0)
## car 2.1-1 2015-12-14 CRAN (R 3.2.3)
## cluster 2.0.3 2015-07-21 CRAN (R 3.2.2)
## colorspace 1.2-6 2015-03-11 CRAN (R 3.1.3)
## DBI 0.3.1 2014-09-24 CRAN (R 3.1.1)
## devtools 1.9.1 2015-09-11 CRAN (R 3.2.0)
## digest 0.6.9 2016-01-08 CRAN (R 3.2.3)
## dplyr * 0.4.3 2015-09-01 CRAN (R 3.2.0)
## foreign 0.8-65 2015-07-02 CRAN (R 3.2.2)
## Formula 1.2-1 2015-04-07 CRAN (R 3.1.3)
## ggplot2 2.0.0 2015-12-16 Github (hadley/ggplot2@11679cd)
## gridExtra * 2.0.0 2015-07-14 CRAN (R 3.2.0)
## gtable 0.1.2 2012-12-05 CRAN (R 3.1.0)
## Hmisc 3.17-0 2015-09-21 CRAN (R 3.2.0)
## httr 1.0.0 2015-06-25 CRAN (R 3.2.0)
## intsvy 1.8 2015-11-30 CRAN (R 3.2.2)
## labeling 0.3 2014-08-23 CRAN (R 3.1.1)
## lattice 0.20-33 2015-07-14 CRAN (R 3.2.2)
## latticeExtra 0.6-26 2013-08-15 CRAN (R 3.1.0)
## lazyeval 0.1.10 2015-01-02 CRAN (R 3.1.2)
## lme4 1.1-10 2015-10-06 CRAN (R 3.2.2)
## lubridate 1.5.0 2015-12-03 CRAN (R 3.2.3)
## magrittr 1.5 2014-11-22 CRAN (R 3.1.2)
## MASS 7.3-43 2015-07-16 CRAN (R 3.2.2)
## Matrix 1.2-2 2015-07-08 CRAN (R 3.2.2)
## MatrixModels 0.4-1 2015-08-22 CRAN (R 3.2.0)
## memisc 0.97 2015-03-08 CRAN (R 3.1.3)
## memoise 0.2.1 2014-04-22 CRAN (R 3.1.0)
## mgcv 1.8-7 2015-07-23 CRAN (R 3.2.2)
## minqa 1.2.4 2014-10-09 CRAN (R 3.1.1)
## munsell 0.4.2 2013-07-11 CRAN (R 3.1.0)
## nlme 3.1-121 2015-06-29 CRAN (R 3.2.2)
## nloptr 1.0.4 2014-08-04 CRAN (R 3.1.1)
## nnet 7.3-10 2015-06-29 CRAN (R 3.2.2)
## pbkrtest 0.4-4 2015-12-12 CRAN (R 3.2.3)
## plyr 1.8.3 2015-06-12 CRAN (R 3.2.0)
## proto 0.3-10 2012-12-22 CRAN (R 3.1.0)
## quantreg 5.19 2015-08-31 CRAN (R 3.2.0)
## R6 2.1.2 2016-01-26 CRAN (R 3.2.3)
## RColorBrewer 1.1-2 2014-12-07 CRAN (R 3.1.2)
## Rcpp 0.12.3 2016-01-10 CRAN (R 3.2.3)
## RCurl 1.95-4.7 2015-06-30 CRAN (R 3.2.0)
## reshape 0.8.5 2014-04-23 CRAN (R 3.1.0)
## rpart 4.1-10 2015-06-29 CRAN (R 3.2.2)
## RSQLite 1.0.0 2014-10-25 CRAN (R 3.1.2)
## scales 0.3.0 2015-08-25 CRAN (R 3.2.0)
## SparseM 1.7 2015-08-15 CRAN (R 3.2.0)
## stringi 1.0-1 2015-10-22 CRAN (R 3.2.0)
## stringr 1.0.0 2015-04-30 CRAN (R 3.2.0)
## survival 2.38-3 2015-07-02 CRAN (R 3.2.2)
# Be warned, this line will install al lot of packages in old versions
# restoreLibs("pbiecek/graphGallery/arepo/600bda83cb840947976bd1ce3a11879d")
# aread("pbiecek/graphGallery/arepo/600bda83cb840947976bd1ce3a11879d")
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Sierra 10.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.2.1 dplyr_0.5.0 ggplot2_2.2.0.9000
## [4] archivist_2.1.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.8 plyr_1.8.4 bitops_1.0-6 tools_3.3.2
## [5] digest_0.6.10 lubridate_1.6.0 jsonlite_1.1 RSQLite_1.1-1
## [9] evaluate_0.10 memoise_1.0.0 tibble_1.2 gtable_0.2.0
## [13] rstudioapi_0.6 shiny_0.14.2 DBI_0.5-1 curl_2.2
## [17] yaml_2.1.14 withr_1.0.2 httr_1.2.1 stringr_1.1.0
## [21] knitr_1.15 xml2_1.0.0 devtools_1.12.0 rprojroot_1.1
## [25] grid_3.3.2 R6_2.2.0 rmarkdown_1.2 magrittr_1.5
## [29] backports_1.0.4 scales_0.4.1 htmltools_0.3.5 assertthat_0.1
## [33] mime_0.5 colorspace_1.3-1 xtable_1.8-2 httpuv_1.3.3
## [37] labeling_0.3 stringi_1.1.2 RCurl_1.95-4.8 lazyeval_0.2.0
## [41] munsell_0.4.3