- using R version 4.5.1 (2025-06-13 ucrt)
- using platform: x86_64-w64-mingw32
- R was compiled by
gcc.exe (GCC) 14.2.0
GNU Fortran (GCC) 14.2.0
- running under: Windows Server 2022 x64 (build 20348)
- using session charset: UTF-8
- checking for file 'topicmodels.etm/DESCRIPTION' ... OK
- checking extension type ... Package
- this is package 'topicmodels.etm' version '0.1.0'
- package encoding: UTF-8
- checking package namespace information ... OK
- checking package dependencies ... OK
- checking if this is a source package ... OK
- checking if there is a namespace ... OK
- checking for hidden files and directories ... OK
- checking for portable file names ... OK
- checking whether package 'topicmodels.etm' can be installed ... OK
See the install log for details.
- checking installed package size ... OK
- checking package directory ... OK
- checking DESCRIPTION meta-information ... OK
- checking top-level files ... OK
- checking for left-over files ... OK
- checking index information ... OK
- checking package subdirectories ... OK
- checking code files for non-ASCII characters ... OK
- checking R files for syntax errors ... OK
- checking whether the package can be loaded ... [4s] OK
- checking whether the package can be loaded with stated dependencies ... [4s] OK
- checking whether the package can be unloaded cleanly ... [3s] OK
- checking whether the namespace can be loaded with stated dependencies ... [4s] OK
- checking whether the namespace can be unloaded cleanly ... [4s] OK
- checking loading without being on the library search path ... [4s] OK
- checking use of S3 registration ... OK
- checking dependencies in R code ... OK
- checking S3 generic/method consistency ... OK
- checking replacement functions ... OK
- checking foreign function calls ... OK
- checking R code for possible problems ... [9s] OK
- checking Rd files ... [1s] NOTE
checkRd: (-1) ETM.Rd:33: Lost braces in \itemize; \value handles \item{}{} directly
checkRd: (-1) ETM.Rd:34: Lost braces in \itemize; \value handles \item{}{} directly
checkRd: (-1) ETM.Rd:35: Lost braces in \itemize; \value handles \item{}{} directly
checkRd: (-1) ETM.Rd:36: Lost braces in \itemize; \value handles \item{}{} directly
checkRd: (-1) ETM.Rd:37: Lost braces in \itemize; \value handles \item{}{} directly
- checking Rd metadata ... OK
- checking Rd cross-references ... OK
- checking for missing documentation entries ... OK
- checking for code/documentation mismatches ... OK
- checking Rd \usage sections ... OK
- checking Rd contents ... OK
- checking for unstated dependencies in examples ... OK
- checking contents of 'data' directory ... OK
- checking data for non-ASCII characters ... [1s] OK
- checking data for ASCII and uncompressed saves ... OK
- checking examples ... [5s] ERROR
Running examples in 'topicmodels.etm-Ex.R' failed
The error most likely occurred in:
> ### Name: ETM
> ### Title: Topic Modelling in Semantic Embedding Spaces
> ### Aliases: ETM
>
> ### ** Examples
>
> library(torch)
> library(topicmodels.etm)
> library(word2vec)
> library(udpipe)
> data(brussels_reviews_anno, package = "udpipe")
> ##
> ## Toy example with pretrained embeddings
> ##
>
> ## a. build word2vec model
> x <- subset(brussels_reviews_anno, language %in% "nl")
> x <- paste.data.frame(x, term = "lemma", group = "doc_id")
> set.seed(4321)
> w2v <- word2vec(x = x$lemma, dim = 15, iter = 20, type = "cbow", min_count = 5)
> embeddings <- as.matrix(w2v)
>
> ## b. build document term matrix on nouns + adjectives, align with the embedding terms
> dtm <- subset(brussels_reviews_anno, language %in% "nl" & upos %in% c("NOUN", "ADJ"))
> dtm <- document_term_frequencies(dtm, document = "doc_id", term = "lemma")
> dtm <- document_term_matrix(dtm)
> dtm <- dtm_conform(dtm, columns = rownames(embeddings))
> dtm <- dtm[dtm_rowsums(dtm) > 0, ]
>
> ## create and fit an embedding topic model - 8 topics, theta 100-dimensional
> if (torch::torch_is_installed()) {
+
+ set.seed(4321)
+ torch_manual_seed(4321)
+ model <- ETM(k = 8, dim = 100, embeddings = embeddings, dropout = 0.5)
+ optimizer <- optim_adam(params = model$parameters, lr = 0.005, weight_decay = 0.0000012)
+ overview <- model$fit(data = dtm, optimizer = optimizer, epoch = 40, batch_size = 1000)
+ scores <- predict(model, dtm, type = "topics")
+
+ lastbatch <- subset(overview$loss, overview$loss$batch_is_last == TRUE)
+ plot(lastbatch$epoch, lastbatch$loss)
+ plot(overview$loss_test)
+
+ ## show top words in each topic
+ terminology <- predict(model, type = "terms", top_n = 7)
+ terminology
+
+ ##
+ ## Toy example without pretrained word embeddings
+ ##
+ set.seed(4321)
+ torch_manual_seed(4321)
+ model <- ETM(k = 8, dim = 100, embeddings = 15, dropout = 0.5, vocab = colnames(dtm))
+ optimizer <- optim_adam(params = model$parameters, lr = 0.005, weight_decay = 0.0000012)
+ overview <- model$fit(data = dtm, optimizer = optimizer, epoch = 40, batch_size = 1000)
+ terminology <- predict(model, type = "terms", top_n = 7)
+ terminology
+
+
+
+ ## Don't show:
+ ##
+ ## Another example using fit_original
+ ##
+ data(ng20, package = "topicmodels.etm")
+ vocab <- ng20$vocab
+ tokens <- ng20$bow_tr$tokens
+ counts <- ng20$bow_tr$counts
+
+ torch_manual_seed(123456789)
+ model <- ETM(k = 4, vocab = vocab, dim = 5, embeddings = 25)
+ model
+ optimizer <- optim_adam(params = model$parameters, lr = 0.005, weight_decay = 0.0000012)
+
+ traindata <- list(tokens = tokens, counts = counts, vocab = vocab)
+ test1 <- list(tokens = ng20$bow_ts_h1$tokens, counts = ng20$bow_ts_h1$counts, vocab = vocab)
+ test2 <- list(tokens = ng20$bow_ts_h2$tokens, counts = ng20$bow_ts_h2$counts, vocab = vocab)
+
+ out <- model$fit_original(data = traindata, test1 = test1, test2 = test2, epoch = 4,
+ optimizer = optimizer, batch_size = 1000,
+ lr_anneal_factor = 4, lr_anneal_nonmono = 10)
+ test <- subset(out$loss, out$loss$batch_is_last == TRUE)
+ plot(test$epoch, test$loss)
+
+ topic.centers <- as.matrix(model, type = "embedding", which = "topics")
+ word.embeddings <- as.matrix(model, type = "embedding", which = "words")
+ topic.terminology <- as.matrix(model, type = "beta")
+
+ terminology <- predict(model, type = "terms", top_n = 4)
+ terminology
+ ## End(Don't show)
+
+ }
- checking for unstated dependencies in 'tests' ... OK
- checking tests ... [1s] OK
Running 'tinytest.R' [0s]
- checking PDF version of manual ... [20s] OK
- checking HTML version of manual ... [1s] OK
- DONE
Status: 1 ERROR, 1 NOTE