- using R Under development (unstable) (2025-09-10 r88809)
- using platform: x86_64-pc-linux-gnu
- R was compiled by
Debian clang version 19.1.7 (3+b2)
Debian flang-new version 19.1.7 (3+b2)
- running under: Debian GNU/Linux forky/sid
- using session charset: UTF-8
- checking for file ‘fixest/DESCRIPTION’ ... OK
- checking extension type ... Package
- this is package ‘fixest’ version ‘0.13.2’
- package encoding: UTF-8
- checking CRAN incoming feasibility ... [4s/5s] OK
- checking package namespace information ... OK
- checking package dependencies ... OK
- checking if this is a source package ... OK
- checking if there is a namespace ... OK
- checking for executable files ... OK
- checking for hidden files and directories ... OK
- checking for portable file names ... OK
- checking for sufficient/correct file permissions ... OK
- checking whether package ‘fixest’ can be installed ... OK
See the install log for details.
- used C++ compiler: ‘Debian clang version 19.1.7 (3+b2)’
- checking package directory ... OK
- checking for future file timestamps ... OK
- checking ‘build’ directory ... OK
- checking DESCRIPTION meta-information ... OK
- checking top-level files ... OK
- checking for left-over files ... OK
- checking index information ... OK
- checking package subdirectories ... OK
- checking code files for non-ASCII characters ... OK
- checking R files for syntax errors ... OK
- checking whether the package can be loaded ... [3s/3s] OK
- checking whether the package can be loaded with stated dependencies ... [2s/4s] OK
- checking whether the package can be unloaded cleanly ... [3s/4s] OK
- checking whether the namespace can be loaded with stated dependencies ... [2s/3s] OK
- checking whether the namespace can be unloaded cleanly ... [2s/3s] OK
- checking loading without being on the library search path ... [1s/1s] OK
- checking whether startup messages can be suppressed ... [2s/3s] OK
- checking use of S3 registration ... OK
- checking dependencies in R code ... OK
- checking S3 generic/method consistency ... OK
- checking replacement functions ... OK
- checking foreign function calls ... OK
- checking R code for possible problems ... [110s/139s] OK
- checking Rd files ... [2s/3s] OK
- checking Rd metadata ... OK
- checking Rd line widths ... OK
- checking Rd cross-references ... OK
- checking for missing documentation entries ... OK
- checking for code/documentation mismatches ... OK
- checking Rd \usage sections ... OK
- checking Rd contents ... OK
- checking for unstated dependencies in examples ... OK
- checking contents of ‘data’ directory ... OK
- checking data for non-ASCII characters ... [0s/0s] OK
- checking LazyData ... OK
- checking data for ASCII and uncompressed saves ... OK
- checking line endings in C/C++/Fortran sources/headers ... OK
- checking line endings in Makefiles ... OK
- checking compilation flags in Makevars ... OK
- checking for GNU extensions in Makefiles ... OK
- checking for portable use of $(BLAS_LIBS) and $(LAPACK_LIBS) ... OK
- checking use of PKG_*FLAGS in Makefiles ... OK
- checking use of SHLIB_OPENMP_*FLAGS in Makefiles ... OK
- checking pragmas in C/C++ headers and code ... OK
- checking compilation flags used ... OK
- checking compiled code ... OK
- checking installed files from ‘inst/doc’ ... OK
- checking files in ‘vignettes’ ... OK
- checking examples ... [24s/37s] OK
- checking for unstated dependencies in ‘demo’ ... OK
- checking for unstated dependencies in ‘tests’ ... OK
- checking tests ... [30s/43s] ERROR
Running ‘fixest_tests.R’ [29s/43s]
Running the tests in ‘tests/fixest_tests.R’ failed.
Complete output:
> #----------------------------------------------#
> # Author: Laurent Berge
> # Date creation: Fri Jul 10 09:03:06 2020
> # ~: package sniff tests
> #----------------------------------------------#
>
> # Not everything is currently covered, but I'll improve it over time
>
> # Some functions are not trivial to test properly though
>
> library(fixest)
> library(sandwich)
>
> test = fixest:::test ; chunk = fixest:::chunk
> vcovClust = fixest:::vcovClust
> stvec = stringmagic::string_vec_alias()
>
> setFixest_notes(FALSE)
>
> if(fixest:::is_r_check()){
+ if(requireNamespace("data.table", quietly = TRUE)){
+ library(data.table)
+ data.table::setDTthreads(1)
+ }
+ setFixest_nthreads(1)
+ }
>
> ####
> #### ESTIMATIONS ####
> ####
>
> ####
> #### ... Main ####
> ####
>
>
> chunk("ESTIMATION")
ESTIMATION
>
> set.seed(0)
>
> base = iris
> names(base) = c("y", "x1", "x2", "x3", "species")
> base$fe_2 = rep(1:5, 30)
> base$fe_3 = sample(15, 150, TRUE)
> base$constant = 5
> base$y_int = as.integer(base$y)
> base$w = as.vector(unclass(base$species) - 0.95)
> base$offset_value = unclass(base$species) - 0.95
> base$y_01 = 1 * ((scale(base$x1) + rnorm(150)) > 0)
> # what follows to avoid removal of fixed-effects (logit is pain in the neck)
> base$y_01[1:5 + rep(c(0, 50, 100), each = 5)] = 1
> base$y_01[6:10 + rep(c(0, 50, 100), each = 5)] = 0
> # We enforce the removal of observations
> base$y_int_null = base$y_int
> base$y_int_null[base$fe_3 %in% 1:5] = 0
>
> for(model in c("ols", "pois", "logit", "negbin", "Gamma")){
+ cat("Model: ", format(model, width = 6), sep = "")
+ for(use_weights in c(FALSE, TRUE)){
+ my_weight = NULL
+ if(use_weights) my_weight = base$w
+
+ for(use_offset in c(FALSE, TRUE)){
+ my_offset = NULL
+ if(use_offset) my_offset = base$offset_value
+
+ for(id_fe in 0:9){
+
+ cat(".")
+
+ tol = switch(model, "negbin" = 1e-2, "logit" = 3e-5, 1e-5)
+
+ # Setting up the formula to accommodate FEs
+ if(id_fe == 0){
+ fml_fixest = fml_stats = y ~ x1
+ } else if(id_fe == 1){
+ fml_fixest = y ~ x1 | species
+ fml_stats = y ~ x1 + factor(species)
+ } else if(id_fe == 2){
+ fml_fixest = y ~ x1 | species + fe_2
+ fml_stats = y ~ x1 + factor(species) + factor(fe_2)
+ } else if(id_fe == 3){
+ # varying slope
+ fml_fixest = y ~ x1 | species[[x2]]
+ fml_stats = y ~ x1 + x2:species
+ } else if(id_fe == 4){
+ # varying slope -- 1 VS, 1 FE
+ fml_fixest = y ~ x1 | species[[x2]] + fe_2
+ fml_stats = y ~ x1 + x2:species + factor(fe_2)
+ } else if(id_fe == 5){
+ # varying slope -- 2 VS
+ fml_fixest = y ~ x1 | species[x2]
+ fml_stats = y ~ x1 + x2:species + species
+ } else if(id_fe == 6){
+ # varying slope -- 2 VS bis
+ fml_fixest = y ~ x1 | species[[x2]] + fe_2[[x3]]
+ fml_stats = y ~ x1 + x2:species + x3:factor(fe_2)
+ } else if(id_fe == 7){
+ # Combined clusters
+ fml_fixest = y ~ x1 + x2 | species^fe_2
+ fml_stats = y ~ x1 + x2 + paste(species, fe_2)
+ } else if(id_fe == 8){
+ fml_fixest = y ~ x1 | species[x2] + fe_2[x3] + fe_3
+ fml_stats = y ~ x1 + species + i(species, x2) + factor(fe_2) + i(fe_2, x3) + factor(fe_3)
+ } else if(id_fe == 9){
+ fml_fixest = y ~ x1 | species + fe_2[x2,x3] + fe_3
+ fml_stats = y ~ x1 + species + factor(fe_2) + i(fe_2, x2) + i(fe_2, x3) + factor(fe_3)
+ }
+
+ # ad hoc modifications of the formula
+ if(model == "logit"){
+ fml_fixest = xpd(y_01 ~ ..rhs, ..rhs = fml_fixest[[3]])
+ fml_stats = xpd(y_01 ~ ..rhs, ..rhs = fml_stats[[3]])
+
+ # The estimations are OK, conv differences out of my control
+ if(id_fe %in% 8:9) tol = 0.5
+
+ } else if(model == "pois"){
+ fml_fixest = xpd(y_int_null ~ ..rhs, ..rhs = fml_fixest[[3]])
+ fml_stats = xpd(y_int_null ~ ..rhs, ..rhs = fml_stats[[3]])
+
+ } else if(model %in% c("negbin", "Gamma")){
+ fml_fixest = xpd(y_int ~ ..rhs, ..rhs = fml_fixest[[3]])
+ fml_stats = xpd(y_int ~ ..rhs, ..rhs = fml_stats[[3]])
+ }
+
+ adj = 1
+ if(model == "ols"){
+ res = feols(fml_fixest, base, weights = my_weight, offset = my_offset)
+ res_bis = lm(fml_stats, base, weights = my_weight, offset = my_offset)
+
+ } else if(model %in% c("pois", "logit", "Gamma")){
+ adj = 0
+ if(model == "Gamma" && use_offset) next
+
+ my_family = switch(model, pois = poisson(), logit = binomial(), Gamma = Gamma())
+
+ res = feglm(fml_fixest, base, family = my_family, weights = my_weight, offset = my_offset)
+
+ if(!is.null(res$obs_selection$obsRemoved)){
+ qui = res$obs_selection$obsRemoved
+
+ # I MUST do that.... => subset does not work...
+ base_tmp = base[qui, ]
+ base_tmp$my_offset = my_offset[qui]
+ base_tmp$my_weight = my_weight[qui]
+ res_bis = glm(fml_stats, base_tmp, family = my_family, weights = my_weight, offset = my_offset)
+ } else {
+ res_bis = glm(fml_stats, data = base, family = my_family, weights = my_weight, offset = my_offset)
+ }
+
+ } else if(model == "negbin"){
+ # no offset in glm.nb + no VS in fenegbin + no weights in fenegbin
+ if(use_weights || use_offset || id_fe > 2) next
+
+ res = fenegbin(fml_fixest, base, notes = FALSE)
+ res_bis = MASS::glm.nb(fml_stats, base)
+
+ }
+
+ test(coef(res)["x1"], coef(res_bis)["x1"], "~", tol)
+ test(se(res, se = "st", ssc = ssc(K.adj = adj))["x1"], se(res_bis)["x1"], "~", tol)
+ test(pvalue(res, se = "st", ssc = ssc(K.adj = adj))["x1"], pvalue(res_bis)["x1"], "~", tol*10**(model == "negbin"))
+ # cat("Model: ", model, ", FE: ", id_fe, ", weight: ", use_weights, ", offset: ", use_offset, "\n", sep="")
+
+ }
+ cat("|")
+ }
+ }
+ cat("\n")
+ }
Model: ols ..........|..........|..........|..........|
Model: pois ..........|..........|..........|..........|
Model: logit ..........|..........|..........|..........|
Model: negbin..........|..........|..........|..........|
Model: Gamma ..........|..........|..........|..........|
There were 36 warnings (use warnings() to see them)
>
> ####
> #### ... Corner cases ####
> ####
>
> chunk("Corner cases")
CORNER CASES
>
>
> # We test the absence of bugs
>
> base = iris
> names(base) = c("y", "x1", "x2", "x3", "fe1")
> base$fe2 = rep(1:5, 30)
> base$y[1:5] = NA
> base$x1[4:8] = NA
> base$x2[4:21] = NA
> base$x3[110:111] = NA
> base$fe1[110:118] = NA
> base$fe2[base$fe2 == 1] = 0
> base$fe3 = sample(letters[1:5], 150, TRUE)
> base$period = rep(1:50, 3)
> base$x_cst = 1
>
> res = feols(y ~ 1 | csw(fe1, fe1^fe2), base)
>
> res = feols(y ~ 1 + csw(x1, i(fe1)) | fe2, base)
>
> res = feols(y ~ csw(f(x1, 1:2), x2) | sw0(fe2, fe2^fe3), base, panel.id = ~ fe1 + period)
>
> res = feols(d(y) ~ -1 + d(x2), base, panel.id = ~ fe1 + period)
> test(length(coef(res)), 1)
>
> res = feols(c(y, x1) ~ 1 | fe1 | x2 ~ x3, base)
>
> res = feols(y ~ x1 | fe1[x2] + fe2[x2], base)
>
> #
> # singletons
> #
>
> # all variables removed bc of singleton
> base$singletons = 1:nrow(base)
> test(feols(y ~ x1 | singletons, base, fixef.rm = "singleton"), "err")
>
> # in multiple estimations => no error
> res = feols(y ~ x1 | sw0(singletons), base, fixef.rm = "singleton")
> res_bis = feols(y ~ x1, base)
> test(coef(res[[1]]), coef(res_bis))
> test(is.null(coef(res[[2]])), TRUE)
>
> #
> # NA models (ie all variables are collinear with the FEs)
> #
>
> # Should work when warn = FALSE or multiple est
> for(i in 1:2){
+ fun = switch(i, "1" = feols, "2" = feglm)
+
+ res = feols(y ~ x_cst | fe1, base, warn = FALSE)
+ res # => no error
+ etable(res) # => no error
+
+ # error when warn = TRUE
+ test(feols(y ~ x_cst | fe1, base), "err")
+
+ # multiple est => no error
+ res = feols(c(y, x1) ~ x_cst | fe1, base)
+ res # => no error
+ etable(res) # => no error
+ }
>
>
> # Removing the intercept!!!
>
> res = feols(y ~ -1 + x1 + i(fe1), base)
> test("(Intercept)" %in% names(res$coefficients), FALSE)
>
> res = feols(y ~ -1 + x1 + factor(fe1), base)
> test("(Intercept)" %in% names(res$coefficients), FALSE)
>
> res = feols(y ~ -1 + x1 + i(fe1) + i(fe2), base)
> test("(Intercept)" %in% names(res$coefficients), FALSE)
> test(is.null(res$collin.var), TRUE)
>
>
> # IV + interacted FEs
> res = feols(y ~ x1 | fe1^fe2 | x2 ~ x3, base)
>
> # IVs no exo var
> res = feols(y ~ 0 | x2 ~ x3, base)
> # Same in stepwise
> res = feols(y ~ 0 | sw0(fe1) | x2 ~ x3, base)
>
> # IVs + lags
> res = feols(y ~ x1 | fe1^fe2 | l(x2, -1:1) ~ l(x3, -1:1), base, panel.id = ~ fe1 + period)
>
> # functions in interactions
> res = feols(y ~ x1 | factor(fe1)^factor(fe2), base)
> res = feols(y ~ x1 | round(x2^2), base)
> test(feols(y ~ x1 | factor(fe1^fe2), base), "err")
>
> res = feols(y ~ x1 | bin(x2, "bin::1")^fe1 + fe1^fe2, base)
>
> # 1 obs (after FE removal) estimation
> base_1obs = data.frame(y = c(1, 0), fe = c(1, 2), x = c(1, 0))
> test(fepois(y ~ x | fe, base_1obs, fixef.rm = "infinite"), "err")
> # no error
> res = fepois(y ~ 1 | fe, base_1obs, fixef.rm = "infinite")
>
> # warning when demeaning algo reaches max iterations
> data(trade)
> test(feols(Euros ~ log(dist_km) | Destination + Origin + Product,
+ trade, fixef.iter = 1), "warn")
>
>
> ####
> #### ... depvar removal ####
> ####
>
> chunk("depvar removal")
DEPVAR REMOVAL
>
> # we remove the depvar when it is in the RHS
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
>
> fml_equiv = c(
+ "y" = "1",
+ "y + x1" = "x1",
+ "x1 + y + x2 + I(x1)" = "x1 + x2 + I(x1)"
+ )
>
> all_funs = list(feols, feglm, femlm)
>
> id_fml = 3
> id_fun = 2
>
> fun = all_funs[[id_fun]]
>
> for(id_fun in seq_along(all_funs)){
+ fun = all_funs[[id_fun]]
+ cat("|")
+ for(id_fml in seq_along(fml_equiv)){
+
+ if(id_fun == 3 && id_fml == 3) next
+
+ cat(".")
+ rhs_0 = names(fml_equiv)[id_fml]
+ rhs_1 = fml_equiv[id_fml]
+
+ # simple estimation
+ est_0 = fun(y ~ .[rhs_0], base)
+ est_1 = fun(y ~ .[rhs_1], base)
+ test(coef(est_0), coef(est_1))
+ test(se(est_0), se(est_1))
+
+ # multiple samples
+ est_0 = fun(y ~ .[rhs_0], base, split = "species")
+ est_1 = fun(y ~ .[rhs_1], base, split = "species")
+ test(coef(est_0[[1]]), coef(est_1[[1]]))
+ test(se(est_0[[1]]), se(est_1[[1]]))
+
+ # multiple lhs
+ est_0 = fun(c(y, x3) ~ .[rhs_0] + x3, base)
+ est_1a = fun(y ~ .[rhs_1] + x3, base)
+ est_1b = fun(x3 ~ .[rhs_0], base)
+ test(coef(est_0[[1]]), coef(est_1a))
+ test(coef(est_0[[2]]), coef(est_1b))
+ test(se(est_0[[1]]), se(est_1a))
+ test(se(est_0[[2]]), se(est_1b))
+
+ # multiple rhs + fixef
+ est_0 = fun(y ~ csw(.[rhs_0], x3) | sw0(species), base)
+ est_1 = fun(y ~ csw(.[rhs_1], x3) | sw0(species), base)
+ for(id_mult in n_models(est_0)){
+ test(coef(est_0[[id_mult]]), coef(est_1[[id_mult]]))
+ test(se(est_0[[id_mult]]), se(est_1[[id_mult]]))
+ }
+
+ # in IVs
+ if(id_fun == 1){
+ base$inst = rnorm(nrow(base))
+ base$endo = rnorm(nrow(base))
+ est_0 = feols(y ~ csw(.[rhs_0], x3) | sw0(species) | endo ~ inst, base)
+ est_1 = feols(y ~ csw(.[rhs_1], x3) | sw0(species) | endo ~ inst, base)
+ for(id_mult in n_models(est_0)){
+ test(coef(est_0[[id_mult]]), coef(est_1[[id_mult]]))
+ }
+ }
+ }
+ }
|...|...|..> cat("\n")
>
>
> # model.matrix
> est = feols(y ~ x1 + y, base)
> test(ncol(model.matrix(est)), 2)
>
> ####
> #### ... obs removal ####
> ####
>
> base_rm = base_did
> base_rm$y = abs(base_rm$y)
> # we add 0-values
> is_only_0 = base_rm$id <= 10
> base_rm$y[is_only_0] = 0
> # we add singletons
> is_single = base_rm$id %in% 50:55
> base_rm$period[is_single] = runif(sum(is_single))
>
> est_none = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "none")
> est_single = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "singletons")
> est_inf = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "infinite")
> est_perfect = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "perfect")
>
> n = nrow(base_rm)
> test(nobs(est_none), n)
> test(nobs(est_single), n - sum(is_single))
> test(nobs(est_inf), n - sum(is_only_0))
> test(nobs(est_perfect), n - sum(is_single) - sum(is_only_0))
>
> ####
> #### ... Fit methods ####
> ####
>
> chunk("Fit methods")
FIT METHODS
>
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
> base$y_int = as.integer(base$y)
> base$y_log = sample(c(TRUE, FALSE), 150, TRUE)
>
> res = feglm.fit(base$y, base[, 2:4])
> res_bis = feglm(y ~ -1 + x1 + x2 + x3, base)
> test(coef(res), coef(res_bis))
>
> res = feglm.fit(base$y_int, base[, 2:4])
> res_bis = feglm(y_int ~ -1 + x1 + x2 + x3, base)
> test(coef(res), coef(res_bis))
>
> res = feglm.fit(base$y_log, base[, 2:4])
> res_bis = feglm(y_log ~ -1 + x1 + x2 + x3, base)
> test(coef(res), coef(res_bis))
>
>
>
> res = feglm.fit(base$y, base[, 2:4], family = "poisson")
> res_bis = feglm(y ~ -1 + x1 + x2 + x3, base, family = "poisson")
> test(coef(res), coef(res_bis))
>
> res = feglm.fit(base$y_int, base[, 2:4], family = "poisson")
> res_bis = feglm(y_int ~ -1 + x1 + x2 + x3, base, family = "poisson")
> test(coef(res), coef(res_bis))
>
> res = feglm.fit(base$y_log, base[, 2:4], family = "poisson")
> res_bis = feglm(y_log ~ -1 + x1 + x2 + x3, base, family = "poisson")
> test(coef(res), coef(res_bis))
>
> ####
> #### global variables ####
> ####
>
> chunk("globals")
GLOBALS
>
> est_reg = function(df, yvar, xvar, refgrp) {
+ feols(.[yvar] ~ i(.[xvar], ref = refgrp), data = df)
+ }
>
> est = est_reg(iris, "Sepal.Length", "Species", refgrp = "setosa")
>
> # checking when it should not work
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
>
> z = base$x1
> test(feols(y ~ z, base), "err")
>
>
>
> ####
> #### ... Collinearity ####
> ####
>
> chunk("COLLINEARITY")
COLLINEARITY
>
> base = iris
> names(base) = c("y", "x1", "x2", "x3", "species")
> base$constant = 5
> base$y_int = as.integer(base$y)
> base$w = as.vector(unclass(base$species) - 0.95)
>
> for(useWeights in c(FALSE, TRUE)){
+ for(model in c("ols", "pois")){
+ for(use_fe in c(FALSE, TRUE)){
+ cat(".")
+
+ my_weight = NULL
+ if(useWeights) my_weight = base$w
+
+ adj = 1
+ if(model == "ols"){
+ if(!use_fe){
+ res = feols(y ~ x1 + constant, base, weights = my_weight)
+ res_bis = lm(y ~ x1 + constant, base, weights = my_weight)
+ } else {
+ res = feols(y ~ x1 + constant | species, base, weights = my_weight)
+ res_bis = lm(y ~ x1 + constant + species, base, weights = my_weight)
+ }
+ } else {
+ if(!use_fe){
+ res = fepois(y_int ~ x1 + constant, base, weights = my_weight)
+ res_bis = glm(y_int ~ x1 + constant, base, weights = my_weight, family = poisson)
+ } else {
+ res = fepois(y_int ~ x1 + constant | species, base, weights = my_weight)
+ res_bis = glm(y_int ~ x1 + constant + species, base, weights = my_weight, family = poisson)
+ }
+ adj = 0
+ }
+
+ test(coef(res)["x1"], coef(res_bis)["x1"], "~")
+ test(se(res, se = "st", ssc = ssc(K.adj=adj))["x1"], se(res_bis)["x1"], "~")
+ # cat("Weight: ", useWeights, ", model: ", model, ", FE: ", use_fe, "\n", sep="")
+
+ }
+ }
+ }
........> cat("\n")
>
>
> ####
> #### ... Non linear tests ####
> ####
>
> chunk("NON LINEAR")
NON LINEAR
>
> base = iris
> names(base) = c("y", "x1", "x2", "x3", "species")
>
> tab = c("versicolor" = 5, "setosa" = 0, "virginica" = -5)
>
> fun_nl = function(a, b, spec){
+ res = as.numeric(tab[spec])
+ a*res + b*res^2
+ }
>
> est_nl = feNmlm(y ~ x1, base, NL.fml = ~fun_nl(a, b, species), NL.start = 1, family = "gaussian")
>
> base$var_spec = as.numeric(tab[base$species])
>
> est_lin = feols(y ~ x1 + var_spec + I(var_spec^2), base)
>
> test(coef(est_nl), coef(est_lin)[c(3, 4, 1, 2)], "~")
>
> ####
> #### ... Lagging ####
> ####
>
> # Different types of lag
> # 1) check no error in wide variety of situations
> # 2) check consistency
>
> chunk("LAGGING")
LAGGING
>
> data(base_did)
> base = base_did
>
> n = nrow(base)
>
> set.seed(0)
> base$y_na = base$y ; base$y_na[sample(n, 50)] = NA
> base$period_txt = letters[base$period]
> ten_dates = c("1960-01-15", "1960-01-16", "1960-03-31", "1960-04-05", "1960-05-12", "1960-05-25", "1960-06-20", "1960-07-30", "1965-01-02", "2002-12-05")
> base$period_date = as.Date(ten_dates, "%Y-%m-%d")[base$period]
> base$y_0 = base$y**2 ; base$y_0[base$id == 1] = 0
>
> # We compute the lags "by hand"
> base = base[order(base$id, base$period), ]
> base$x1_lag = c(NA, base$x1[-n]) ; base$x1_lag[base$period == 1] = NA
> base$x1_lead = c(base$x1[-1], NA) ; base$x1_lead[base$period == 10] = NA
> base$x1_diff = base$x1 - base$x1_lag
>
> # we create holes
> base$period_bis = base$period ; base$period_bis[base$period_bis == 5] = 50
> base$x1_lag_hole = base$x1_lag ; base$x1_lag_hole[base$period %in% c(5, 6)] = NA
> base$x1_lead_hole = base$x1_lead ; base$x1_lead_hole[base$period %in% c(4, 5)] = NA
>
> # we reshuffle the base
> base = base[sample(n), ]
>
> #
> # Checks consistency
> #
>
> cat("consistentcy...")
consistentcy...>
> test(lag(x1 ~ id + period, data = base), base$x1_lag)
> test(lag(x1 ~ id + period, -1, data = base), base$x1_lead)
>
> test(lag(x1 ~ id + period_bis, data = base), base$x1_lag_hole)
> test(lag(x1 ~ id + period_bis, -1, data = base), base$x1_lead_hole)
>
> test(lag(x1 ~ id + period_txt, data = base), base$x1_lag)
> test(lag(x1 ~ id + period_txt, -1, data = base), base$x1_lead)
>
> test(lag(x1 ~ id + period_date, data = base), base$x1_lag)
> test(lag(x1 ~ id + period_date, -1, data = base), base$x1_lead)
>
> cat("done.\nEstimations...")
done.
Estimations...>
> #
> # Estimations
> #
>
> # Poisson
>
> for(depvar in c("y", "y_na", "y_0")){
+ for(p in c("period", "period_txt", "period_date")){
+
+ base$per = base[[p]]
+
+ cat(".")
+
+ base$y_dep = base[[depvar]]
+ pdat = panel(base, ~ id + period)
+
+ if(depvar == "y_0"){
+ estfun = fepois
+ } else {
+ estfun = feols
+ }
+
+ est_raw = estfun(y_dep ~ x1 + x1_lag + x1_lead, base)
+ est = estfun(y_dep ~ x1 + l(x1) + f(x1), base, panel.id = "id,per")
+ est_pdat = estfun(y_dep ~ x1 + l(x1, 1) + f(x1, 1), pdat)
+ test(coef(est_raw), coef(est))
+ test(coef(est_raw), coef(est_pdat))
+
+ # Now diff
+ est_raw = estfun(y_dep ~ x1 + x1_diff, base)
+ est = estfun(y_dep ~ x1 + d(x1), base, panel.id = "id,per")
+ est_pdat = estfun(y_dep ~ x1 + d(x1, 1), pdat)
+ test(coef(est_raw), coef(est))
+ test(coef(est_raw), coef(est_pdat))
+
+ # Now we just check that calls to l/f works without checking coefs
+
+ est = estfun(y_dep ~ x1 + l(x1) + f(x1), base, panel.id = "id,per")
+ est = estfun(y_dep ~ l(x1, -1:1) + f(x1, 2), base, panel.id = c("id", "per"))
+ est = estfun(y_dep ~ l(x1, -1:1, fill = 1), base, panel.id = ~ id + per)
+ if(depvar == "y") test(est$nobs, n)
+ est = estfun(f(y_dep) ~ f(x1, -1:1), base, panel.id = ~ id + per)
+ }
+ }
.........>
> cat("done.\n\n")
done.
>
> #
> # Data table
> #
>
> cat("data.table...")
data.table...> # We just check there is no bug (consistency should be OK)
>
> suppressPackageStartupMessages(library(data.table))
>
> base_dt = data.table(id = c("A", "A", "B", "B"),
+ time = c(1, 2, 1, 3),
+ x = c(5, 6, 7, 8))
>
> base_dt = panel(base_dt, ~id + time)
>
> base_dt[, x_l := l(x)]
> test(base_dt$x_l, c(NA, 5, NA, NA))
>
> lag_creator = function(dt) {
+ dt2 = panel(dt, ~id + time)
+ dt2[, x_l := l(x)]
+ return(dt2)
+ }
>
> base_bis = lag_creator(base_dt)
>
> base_bis[, x_d := d(x)]
>
> cat("done.\n\n")
done.
>
> #
> # Panel
> #
>
> # We ensure we get the right SEs whether we use the panel() or the panel.id method
> data(base_did)
>
> # Setting a data set as a panel...
> pdat = panel(base_did, ~id+period)
> pdat$fe = sample(15, nrow(pdat), replace = TRUE)
>
> base_panel = unpanel(pdat)
>
> est_pdat = feols(y ~ x1 | fe, pdat)
> est_panel = feols(y ~ x1 | fe, base_panel, panel.id = ~id+period)
>
> test(attr(vcov(est_pdat, attr = TRUE), "type"),
+ attr(vcov(est_panel, attr = TRUE), "type"))
>
> #
> # testing irregularities
> #
>
> base_panel = data.frame(
+ id = rep(letters[1:3], each = 4),
+ time = c(1, 2, 3, 3,
+ 1, 2, 4, 6,
+ 2, 4, 6, 8)
+ )
> base_panel$x = rnorm(nrow(base_panel))
> base_panel$y = base_panel$x * 0.5 + rnorm(nrow(base_panel))
>
> est_panel_irr = feols(y ~ l(x), base_panel, panel.id = "id,time",
+ panel.duplicate.method = "first")
>
> x_lag = model.matrix(est_panel_irr)[, 2]
> test(x_lag, base_panel$x[c(1, 2, 2, 5)])
>
> est_panel_irr_cons = feols(y ~ l(x), base_panel, panel.id = "id,time",
+ panel.time.step = "cons",
+ panel.duplicate.method = "first")
>
> x_lag = model.matrix(est_panel_irr_cons)[, 2]
> test(x_lag, base_panel$x[c(NULL, 1, 2, 2,
+ NULL, 5, NULL, 7,
+ NULL, NULL, 10, 11)])
>
> est_panel_irr_with = feols(y ~ l(x), base_panel, panel.id = "id,time",
+ panel.time.step = "within",
+ panel.duplicate.method = "first")
>
> x_lag = model.matrix(est_panel_irr_with)[, 2]
> test(x_lag, base_panel$x[c(NULL, 1, 2, 3,
+ NULL, 5, 6, 7,
+ NULL, 9, 10, 11)])
>
> ####
> #### ... subset ####
> ####
>
> chunk("SUBSET")
SUBSET
>
> set.seed(5)
> base = iris
> names(base) = c("y", "x1", "x2", "x3", "species")
> base$fe_bis = sample(letters, 150, TRUE)
> base$x4 = rnorm(150)
> base$x1[sample(150, 5)] = NA
>
> fml = y ~ x1 + x2
>
> # Errors
> test(feols(fml, base, subset = ~species), "err")
> test(feols(fml, base, subset = -1:15), "err")
> test(feols(fml, base, subset = integer(0)), "err")
> test(feols(fml, base, subset = c(TRUE, TRUE, FALSE)), "err")
>
> # Valid use
> for(id_fun in 1:6){
+ estfun = switch(as.character(id_fun),
+ "1" = feols,
+ "2" = feglm,
+ "3" = fepois,
+ "4" = femlm,
+ "5" = fenegbin,
+ "6" = feNmlm)
+
+ for(id_fe in 1:5){
+
+ cat(".")
+
+ fml = switch(as.character(id_fe),
+ "1" = y ~ x1 + x2,
+ "2" = y ~ x1 + x2 | species,
+ "3" = y ~ x1 + x2 | fe_bis,
+ "4" = y ~ x1 + x2 + i(fe_bis),
+ "5" = y ~ x1 + x2 | fe_bis[x3])
+
+ if(id_fe == 5 && id_fun %in% 4:6) next
+
+ if(id_fun == 6){
+ res_sub_a = estfun(fml, base, subset = ~species == "setosa", NL.fml = ~ a*x4, NL.start = 0)
+ res_sub_b = estfun(fml, base, subset = base$species == "setosa", NL.fml = ~ a*x4, NL.start = 0)
+ res_sub_c = estfun(fml, base, subset = which(base$species == "setosa"), NL.fml = ~ a*x4, NL.start = 0)
+ res = estfun(fml, base[base$species == "setosa", ], NL.fml = ~ a*x4, NL.start = 0)
+ } else {
+ res_sub_a = estfun(fml, base, subset = ~species == "setosa")
+ res_sub_b = estfun(fml, base, subset = base$species == "setosa")
+ res_sub_c = estfun(fml, base, subset = which(base$species == "setosa"))
+ res = estfun(fml, base[base$species == "setosa", ])
+ }
+
+ test(coef(res_sub_a), coef(res))
+ test(coef(res_sub_b), coef(res))
+ test(coef(res_sub_c), coef(res))
+ test(se(res_sub_c, cluster = "fe_bis"), se(res, cluster = "fe_bis"))
+ }
+ cat("|")
+ }
.....|.....|.....|.....|.....|.....|There were 12 warnings (use warnings() to see them)
> cat("\n")
>
>
> ####
> #### ... split ####
> ####
>
> chunk("split")
SPLIT
>
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
>
> # simple: formula
> est = feols(y ~ x.[1:3], base, split = ~species %keep% "@^v")
> test(length(est), 2)
>
> est = feols(y ~ x.[1:3], base, fsplit = ~species %keep% c("set", "vers"))
> test(length(est), 3)
>
> est = feols(y ~ x.[1:3], base, split = ~species %drop% "set")
> test(length(est), 2)
>
> # simple: vector
> est = feols(y ~ x.[1:3], base, split = base$species %keep% "@^v")
> test(length(est), 2)
>
> est = feols(y ~ x.[1:3], base, split = base$species %keep% c("set", "vers"))
> test(length(est), 2)
>
> est = feols(y ~ x.[1:3], base, split = base$species %drop% "set")
> test(length(est), 2)
>
> # with bin
> est = feols(y ~ x.[1:2], base,
+ split = ~bin(x3, c("cut::5", "saint emilion", "pessac leognan",
+ "margaux", "saint julien", "entre deux mers")) %keep% c("saint e", "pe"))
> test(length(est), 2)
>
> est = feols(y ~ x.[1:2], base,
+ split = ~bin(x3, c("cut::5", "saint emilion", "pessac leognan", NA)) %drop% "@\\d")
> test(length(est), 2)
>
> # with argument
> est = feols(y ~ x.[1:3], base, split = ~species, split.keep = "@^v")
> test(length(est), 2)
>
> est = feols(y ~ x.[1:3], base, fsplit = ~species, split.keep = c("set", "vers"))
> test(length(est), 3)
>
> est = feols(y ~ x.[1:3], base, split = ~species, split.drop = "set")
> test(length(est), 2)
>
>
> ####
> #### ... Multiple estimations ####
> ####
>
> chunk("Multiple")
MULTIPLE
>
> set.seed(2)
> base = iris
> names(base) = c("y1", "x1", "x2", "x3", "species")
> base$y2 = 10 + rnorm(150) + 0.5 * base$x1
> base$x4 = rnorm(150) + 0.5 * base$y1
> base$fe2 = rep(letters[1:15], 10)
> base$fe2[50:51] = NA
> base$y2[base$fe2 == "a" & !is.na(base$fe2)] = 0
> base$x2[1:5] = NA
> base$x3[6] = NA
> base$x5 = rnorm(150)
> base$x6 = rnorm(150) + base$y1 * 0.25
> base$fe3 = rep(letters[1:10], 15)
>
>
> for(id_fun in 1:5){
+ estfun = switch(as.character(id_fun),
+ "1" = feols,
+ "2" = feglm,
+ "3" = fepois,
+ "4" = femlm,
+ "5" = feNmlm)
+
+ # Following weird bug ASAN on CRAN I cannot replicate, check 4/5 not performed on non Windows
+ if(Sys.info()["sysname"] != "Windows"){
+ if(id_fun %in% 4:5) next
+ }
+
+
+ est_multi = estfun(c(y1, y2) ~ x1 + sw(x2, x3), base, split = ~species)
+
+ k = 1
+ for(s in c("setosa", "versicolor", "virginica")){
+ for(lhs in c("y1", "y2")){
+ for(rhs in c("x2", "x3")){
+ res = estfun(.[lhs] ~ x1 + .[rhs], base[base$species == s, ], notes = FALSE)
+
+ test(coef(est_multi[[k]]), coef(res))
+ test(se(est_multi[[k]], cluster = "fe3"), se(res, cluster = "fe3"))
+ k = k + 1
+ }
+ }
+ }
+
+ cat("__")
+
+ est_multi = estfun(c(y1, y2) ~ x1 + csw0(x2, x3) + x4 | species + fe2, base, fsplit = ~species)
+ k = 1
+ all_rhs = c("", "x2", "x3")
+ for(s in c("all", "setosa", "versicolor", "virginica")){
+ for(lhs in c("y1", "y2")){
+ for(n_rhs in 1:3){
+ if(s == "all"){
+ res = estfun(xpd(..lhs ~ x1 + ..rhs + x4 | species + fe2, ..lhs = lhs, ..rhs = all_rhs[1:n_rhs]), base, notes = FALSE)
+ } else {
+ res = estfun(xpd(..lhs ~ x1 + ..rhs + x4 | species + fe2, ..lhs = lhs, ..rhs = all_rhs[1:n_rhs]), base[base$species == s, ], notes = FALSE)
+ }
+
+ vname = names(coef(res))
+ test(coef(est_multi[[k]])[vname], coef(res), "~" , 1e-6)
+ test(se(est_multi[[k]], cluster = "fe3")[vname], se(res, cluster = "fe3"), "~" , 1e-6)
+ k = k + 1
+ }
+ }
+ }
+
+ cat("|")
+ }
__|__|__|> cat("\n")
>
>
>
> # No error tests
> # We test with IV + possible corner cases
>
> base$left = rnorm(150)
> base$right = rnorm(150)
>
> est_multi = feols(c(y1, y2) ~ sw0(x1) | sw0(species) | x2 ~ x3, base)
>
> # We check a few
> est_a = feols(y1 ~ 1 | x2 ~ x3, base)
> est_b = feols(y1 ~ x1 | species | x2 ~ x3, base)
> est_c = feols(y2 ~ 1 | x2 ~ x3, base)
>
> test(coef(est_multi[lhs = "y1", rhs = "^1", fixef = "1", drop = TRUE]), coef(est_a))
> test(coef(est_multi[lhs = "y1", rhs = "x1", fixef = "spe", drop = TRUE]), coef(est_b))
> test(coef(est_multi[lhs = "y2", rhs = "^1", fixef = "1", drop = TRUE]), coef(est_c))
>
> # with fixed covariates
> est_multi_LR = feols(c(y1, y2) ~ left + sw0(x1*x4) + right | sw0(species) | x2 ~ x3, base)
>
> est_a = feols(y1 ~ left + right | x2 ~ x3, base)
> est_b = feols(y1 ~ left + x1*x4 + right | species | x2 ~ x3, base)
> est_c = feols(y2 ~ left + right | x2 ~ x3, base)
>
> test(coef(est_multi_LR[lhs = "y1", rhs = "!x1", fixef = "1", drop = TRUE]), coef(est_a))
> user_name = c("fit_x2", "left", "x1", "x4", "x1:x4", "right")
> test(names(coef(est_multi_LR[lhs = "y1", rhs = "x1", fixef = "spe", drop = TRUE])), user_name)
> test(coef(est_multi_LR[lhs = "y1", rhs = "x1", fixef = "spe", drop = TRUE]), coef(est_b)[user_name])
> test(coef(est_multi_LR[lhs = "y2", rhs = "!x1", fixef = "1", drop = TRUE]), coef(est_c))
>
>
> # mvsw
>
> est_mvsw = feols(y1 ~ mvsw(x1, x2), base)
> est_mvsw_fe = feols(y1 ~ mvsw(x1, x2) | mvsw(species, fe2), base)
> est_mvsw_fe_iv = feols(y1 ~ mvsw(x1, x2) | mvsw(species, fe2) | x3 ~ x4, base)
>
> test(length(est_mvsw), 4)
> test(length(as.list(est_mvsw_fe)), 16)
> test(length(as.list(est_mvsw_fe_iv)), 16)
>
> # Summary of multiple endo vars
> est_multi_iv = feols(c(y1, y2) ~ sw0(x1) | sw0(species) | x3 + x4 ~ x5 + x6, base)
> test(length(est_multi_iv), 8)
> test(length(summary(est_multi_iv, stage = 1)), 16)
>
> # IV without exo var:
> est_mult_no_exo = feols(c(y1, y2) ~ 0 | x3 + x4 ~ x5 + x6, base)
> est_no_exo_y2 = feols(y2 ~ 0 | x3 + x4 ~ x5 + x6, base)
> test(coef(est_mult_no_exo[[2]]), coef(est_no_exo_y2))
>
> # proper ordering
> est_multi = feols(c(y1, y2) ~ sw0(x1) | sw0(fe2), base, split = ~species)
> test(names(models(est_multi[fixef = TRUE, sample = FALSE])),
+ stvec("id, fixef, lhs, rhs, sample.var, sample"))
>
> test(names(models(est_multi[fixef = "fe2", sample = "seto"])),
+ stvec("id, fixef, sample.var, sample, lhs, rhs"))
>
> test(names(models(est_multi[fixef = "fe2", sample = "seto", reorder = FALSE])),
+ stvec("id, sample.var, sample, fixef, lhs, rhs"))
>
> # NA models
> base$y_0 = base$x1 ** 2 + rnorm(150)
> base$y_0[base$species == "setosa"] = 0
>
> est_pois = fepois(y_0 ~ csw(x.[,1:4]), base, split = ~species)
>
> base$x1_bis = base$x1
> est_pois = fepois(y_0 ~ x.[1:3] + x1_bis | sw0(species), base)
>
> # Different ways .[]
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
>
> dep_all = list(stvec("y, x1, x2"), ~y + x1 + x2)
> for(dep in dep_all){
+ m = feols(.[dep] ~ x3, base)
+ test(length(m), 3)
+
+ m = feols(x3 ~ .[dep], base)
+ test(length(m$coefficients), 4)
+
+ m = feols(x3 ~ csw(.[,dep]), base)
+ test(length(m), 3)
+ }
>
> # offset in multiple outcomes // no error test
>
> offset_single_ols = feols(am ~ hp, offset = ~ log(qsec), data = mtcars)
> offset_mult_ols = feols(c(mpg, am) ~ hp, offset = ~ log(qsec), data = mtcars)
>
> test(coef(offset_mult_ols[[2]]), coef(offset_single_ols))
>
> offset_single_glm = feglm(am ~ hp, offset = ~ log(qsec), data = mtcars)
> offset_mult_glm = feglm(c(mpg, am) ~ hp, offset = ~ log(qsec), data = mtcars)
>
> test(coef(offset_mult_glm[[2]]), coef(offset_single_glm))
>
> # LHS expansion with IVs
>
> lhs = c("mpg", "wt")
> est_lhs = feols(.[lhs] ~ disp | hp ~ qsec, data = mtcars)
> test(length(est_lhs), 2)
>
> est_lhs = feols(..("mpg|wt") ~ disp | hp ~ qsec, data = mtcars)
> test(length(est_lhs), 2)
>
>
>
> ####
> #### ... IV ####
> ####
>
> chunk("IV")
IV
>
> base = iris
> names(base) = c("y", "x1", "x_endo_1", "x_inst_1", "fe")
> set.seed(2)
> base$x_inst_2 = 0.2 * base$y + 0.2 * base$x_endo_1 + rnorm(150, sd = 0.5)
> base$x_endo_2 = 0.2 * base$y - 0.2 * base$x_inst_1 + rnorm(150, sd = 0.5)
>
> # Checking a basic estimation
>
> setFixest_vcov(all = "iid")
>
> est_iv = feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1 + x_inst_2, base)
>
> res_f1 = feols(x_endo_1 ~ x1 + x_inst_1 + x_inst_2, base)
> res_f2 = feols(x_endo_2 ~ x1 + x_inst_1 + x_inst_2, base)
>
> base$fit_x_endo_1 = predict(res_f1)
> base$fit_x_endo_2 = predict(res_f2)
>
> res_2nd = feols(y ~ fit_x_endo_1 + fit_x_endo_2 + x1, base)
>
> # the coef
> test(coef(est_iv), coef(res_2nd))
>
> # the SE
> resid_iv = base$y - predict(res_2nd,
+ data.frame(x1 = base$x1, fit_x_endo_1 = base$x_endo_1,
+ fit_x_endo_2 = base$x_endo_2))
> sigma2_iv = sum(resid_iv**2) / (res_2nd$nobs - res_2nd$nparams)
>
> sum_2nd = summary(res_2nd, vcov = res_2nd$cov.iid / res_2nd$sigma2 * sigma2_iv)
>
> test(se(sum_2nd), se(est_iv))
>
> # check no bug when all exogenous vars are removed bc of collinearity
> df = data.frame(x = rnorm(8), y = rnorm(8),
+ z = rnorm(8), fe = rep(0:1, each = 4))
>
> est_iv = feols(y ~ fe | fe | x ~ z, df)
> est_iv = feols(y ~ sw(fe, fe) | fe | x ~ z, df)
>
> # check no bug
> etable(summary(est_iv, stage = 1:2))
summary(est_..1 summary(es..2 summary(est_..3 summary(es..4
Dependent Var.: x y x y
z 0.1127 (0.5532) 0.1127 (0.5532)
x 1.110 (6.499) 1.110 (6.499)
Fixed-Effects: --------------- ------------- --------------- -------------
fe Yes Yes Yes Yes
_______________ _______________ _____________ _______________ _____________
S.E. type IID IID IID IID
Observations 8 8 8 8
R2 0.35875 0.19449 0.35875 0.19449
Within R2 0.00823 0.00779 0.00823 0.00779
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> setFixest_vcov(reset = TRUE)
>
> # Check the number of instruments
> test(
+ feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1, base),
+ "err"
+ )
>
>
> ####
> #### ... VCOV at estimation ####
> ####
>
> chunk("vcov at estimation")
VCOV AT ESTIMATION
>
> base = iris
> names(base) = c("y", "x1", "x2", "x3", "species")
> base$clu = sample(6, 150, TRUE)
> base$clu[1:5] = NA
>
> est = feols(y ~ x1 | species, base, cluster = ~clu, ssc = ssc(K.adj = FALSE))
>
> # The three should be identical
> v1 = est$cov.scaled
> v1b = vcov(est)
> v1c = summary(est)$cov.scaled
>
> test(v1, v1b)
> test(v1, v1c)
>
> # Only ssc change
> v2 = summary(est, ssc = ssc())$cov.scaled
> v2b = vcov(est, ssc = ssc())
>
> test(v2, v2b)
> test(max(abs(v1 - v2)) == 0, FALSE)
>
> # vcov change only
> v3 = summary(est, se = "hetero")$cov.scaled
> v3b = vcov(est, se = "hetero")
>
> test(v3, v3b)
> test(max(abs(v1 - v3)) == 0, FALSE)
> test(max(abs(v2 - v3)) == 0, FALSE)
>
> # feols.fit
>
> ymat = base$y
> xmat = base[, 2:3]
> fe = base$species
>
> for(use_fe in c(TRUE, FALSE)){
+ all_vcov = stvec("iid, hetero")
+ if(use_fe){
+ setFixest_fml(..fe = ~ 1 | species)
+ all_vcov = c(all_vcov, "cluster")
+ } else {
+ setFixest_fml(..fe = ~ 1)
+ }
+
+ for(v in all_vcov){
+
+ if(use_fe){
+ est_fit = feols.fit(ymat, xmat, fe, vcov = v)
+ } else {
+ est_fit = feols.fit(ymat, cbind(1, xmat), vcov = v)
+ }
+
+ est = feols(y ~ x1 + x2 + ..fe, base, vcov = v)
+
+ test(vcov(est), vcov(est_fit))
+ }
+ }
>
> ####
> #### ... Custom VCOV ####
> ####
>
> chunk("custom vcov")
CUSTOM VCOV
>
> # Named list stores string
> est <- feols(mpg ~ i(cyl), mtcars)
> vcov_HC3 <- sandwich::vcovHC(est, type = "HC3")
> est_HC3 <- summary(est, vcov = list("HC3" = vcov_HC3))
> test(attr(est_HC3$se, "type"), "HC3")
> est_tab <- etable(est_HC3)
> test(any(grepl("HC3", est_tab[[2]])), TRUE)
>
> # Passing functions
> test(
+ summary(est, vcov = function(x) sandwich::vcovHC(x, type = "HC3"))$se,
+ summary(est, vcov = sandwich::vcovHC, .vcov_args = list(type = "HC3"))$se
+ )
> # Confirming these work
> temp <- etable(est, vcov = function(x) sandwich::vcovHC(x, type = "HC3"))
> temp <- etable(est, vcov = sandwich::vcovHC, .vcov_args = list(type = "HC3"))
>
>
> # deprecated `.vcov` still works
> est_.vcov <- summary(est, .vcov = vcov_HC3)
Warning message:
In summary.fixest(est, .vcov = vcov_HC3) :
The argument '.vcov' is deprecated. Please use 'vcov' instead.
> test(est_.vcov$se, est_HC3$se)
> etable(est, .vcov = vcov_HC3)
est
Dependent Var.: mpg
Constant 26.66*** (0.9718)
cyl = 6 -6.921*** (1.558)
cyl = 8 -11.56*** (1.299)
_______________ _________________
S.E. type IID
Observations 32
R2 0.73246
Adj. R2 0.71401
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> # Custom vcov to fixest multi
> est_multi <- feols(c(mpg, hp) ~ i(cyl), mtcars)
> vcovs_HC3 <- lapply(est_multi, function(est) sandwich::vcovHC(est, type = "HC3"))
> names(vcovs_HC3) <- c("HC3", "HC3")
> est_multi_HC3 <- summary(est_multi, vcov = vcovs_HC3)
>
> test(vcov(est_multi_HC3[[1]]), vcovs_HC3[[1]])
> test(vcov(est_multi_HC3[[2]]), vcovs_HC3[[2]])
>
> test(attr(est_multi_HC3[[1]]$se, "type"), "HC3")
> test(attr(est_multi_HC3[[2]]$se, "type"), "HC3")
>
> est_tab <- etable(est_multi, est_multi_HC3)
> test(any(grepl("HC3", est_tab$est_multi_HC3.1)), TRUE)
> test(any(grepl("HC3", est_tab$est_multi_HC3.2)), TRUE)
>
> # deprecated `.vcov` still works
> est_multi_.vcov <- summary(est_multi, .vcov = vcovs_HC3)
> test(est_multi_.vcov[[1]]$se, est_multi_HC3[[1]]$se)
>
>
>
> ####
> #### ... Argument sliding ####
> ####
>
> chunk("argument sliding")
ARGUMENT SLIDING
>
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
>
> setFixest_estimation(data = base)
>
> raw = feols(y ~ x1 + x2, base, ~species)
Warning message:
The VCOV matrix is not positive semi-definite and was 'fixed' (see ?vcov).
> slided = feols(y ~ x1 + x2, ~species)
Warning message:
The VCOV matrix is not positive semi-definite and was 'fixed' (see ?vcov).
>
> test(coef(raw), coef(slided))
>
> # Error, with error msg relative to 'data'
> test(feols(y ~ x1 + x2, 1:5), "err")
>
> # should be another estimation
> other_est = feols(y ~ x1 + x2, head(base, 50))
> test(nobs(other_est), 50)
>
> setFixest_estimation(reset = TRUE)
>
> ####
> #### ... Offset ####
> ####
>
> chunk("offset")
OFFSET
>
> # we test the different ways to set an offset
>
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
>
> o1 = feols(y ~ x1 + offset(x2) + offset(x3^2 + 3), base)
> o2 = feols(y ~ x1, base, offset = ~x2 + x3^2 + 3)
> test(coef(o1), coef(o2))
>
> test(predict(o1, newdata = head(base)),
+ predict(o2, newdata = head(base)))
>
> # error
> test(feols(y ~ x1 + offset(x2), base, offset = ~x3), "err")
>
>
> ####
> #### ... Only Coef ####
> ####
>
> chunk("only.coef")
ONLY.COEF
>
>
> base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
> base$x4 = base$x1 + 5
>
> m = feols(y ~ x1 + x2 + x4, base, only.coef = TRUE)
> test(length(m), 4)
> test(sum(is.na(m)), 1)
>
> m = fepois(y ~ x1 + x2 + x4, base, only.coef = TRUE)
> test(length(m), 4)
> test(sum(is.na(m)), 1)
>
> m = femlm(y ~ x1 + x2, base, only.coef = TRUE)
> test(length(m), 3)
> test(sum(is.na(m)), 0)
>
> test(feols(y ~ sw(x1, x2), base, only.coef = TRUE), "err")
>
>
> ####
> #### Standard-errors ####
> ####
>
> chunk("STANDARD ERRORS")
STANDARD ERRORS
>
> #
> # Fixed-effects corrections
> #
>
> # We create "irregular" FEs
> set.seed(0)
> base = data.frame(x = rnorm(20))
> base$y = base$x + rnorm(20)
> base$fe1 = rep(rep(1:3, c(4, 3, 3)), 2)
> base$fe2 = rep(rep(1:5, each = 2), 2)
> est = feols(y ~ x | fe1 + fe2, base, vcov = "cluster")
>
> # fe1: 3 FEs
> # fe2: 5 FEs
>
> #
> # Clustered standard-errors: by fe1
> #
>
> # Default: K.fixef = "nonnested"
> # => adjustment K = 1 + 5 (i.e. x + fe2)
> test(attr(vcov(est, ssc = ssc(K.fixef = "nonnested"), attr = TRUE), "df.K"), 6)
>
> # K.fixef = FALSE
> # => adjustment K = 1 (i.e. only x)
> test(attr(vcov(est, ssc = ssc(K.fixef = "none"), attr = TRUE), "df.K"), 1)
>
> # K.fixef = TRUE
> # => adjustment K = 1 + 3 + 5 - 1 (i.e. x + fe1 + fe2 - 1 restriction)
> test(attr(vcov(est, ssc = ssc(K.fixef = "full"), attr = TRUE), "df.K"), 8)
>
> # K.fixef = TRUE & fixef.exact = TRUE
> # => adjustment K = 1 + 3 + 5 - 2 (i.e. x + fe1 + fe2 - 2 restrictions)
> test(attr(vcov(est, ssc = ssc(K.fixef = "full", K.exact = TRUE), attr = TRUE), "df.K"), 7)
>
> #
> # Manual checks of the SEs
> #
>
> n = est$nobs
> VCOV_raw = est$cov.iid / ((n - 1) / (n - est$nparams))
>
> # standard
> for(k_val in c("none", "nonnested", "full")){
+ for(adj in c(FALSE, TRUE)){
+
+ K = switch(k_val, none = 1, nonnested = 8, full = 8)
+ my_adj = ifelse(adj, (n - 1) / (n - K), 1)
+
+ test(vcov(est, se = "standard", ssc = ssc(K.adj = adj, K.fixef = k_val)), VCOV_raw * my_adj)
+
+ # cat("adj = ", adj, " ; K.fixef = ", k_val, "\n", sep = "")
+ }
+ }
>
> # Clustered, fe1
> VCOV_raw = est$cov.iid / est$sigma2
> H = vcovClust(est$fixef_id$fe1, VCOV_raw, scores = est$scores, adj = FALSE)
> n = nobs(est)
>
> for(tdf in c("conventional", "min")){
+ for(k_val in c("none", "nonnested", "full")){
+ for(c_adj in c(FALSE, TRUE)){
+ for(adj in c(FALSE, TRUE)){
+
+ K = switch(k_val, none = 1, nonnested = 6, full = 8)
+ cluster_factor = ifelse(c_adj, 3/2, 1)
+ df = ifelse(tdf == "min", 2, 20 - K)
+ my_adj = ifelse(adj, (n - 1) / (n - K), 1)
+
+ V = H * cluster_factor
+
+ # test SE
+ test(vcov(est, se = "cluster", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj)), V * my_adj)
+
+ # test pvalue
+ my_tstat = tstat(est, se = "cluster", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj))
+ test(pvalue(est, se = "cluster", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, t.df = tdf)), 2*pt(-abs(my_tstat), df))
+
+ # cat("adj = ", adj, " ; K.fixef = ", k_val, " ; G.adj = ", c_adj, " t.df = ", tdf, "\n", sep = "")
+ }
+ }
+ }
+ }
>
>
> # 2-way Clustered, fe1 fe2
> VCOV_raw = est$cov.iid / est$sigma2
> M_i = vcovClust(est$fixef_id$fe1, VCOV_raw, scores = est$scores, adj = FALSE)
> M_t = vcovClust(est$fixef_id$fe2, VCOV_raw, scores = est$scores, adj = FALSE)
> M_it = vcovClust(paste(base$fe1, base$fe2), VCOV_raw, scores = est$scores, adj = FALSE, do.unclass = TRUE)
>
> M_i + M_t - M_it
[,1]
[1,] 0.005391594
> vcov(est, se = "two", ssc = ssc(K.adj = FALSE, G.adj = FALSE))
x
x 0.005391594
>
> for(cdf in c("conventional", "min")){
+ for(tdf in c("conventional", "min")){
+ for(k_val in c("none", "nonnested", "full")){
+ for(c_adj in c(FALSE, TRUE)){
+ for(adj in c(FALSE, TRUE)){
+
+ K = switch(k_val, none = 1, nonnested = 2, full = 8)
+
+ if(c_adj){
+ if(cdf == "min"){
+ V = (M_i + M_t - M_it) * 3/2
+ } else {
+ V = M_i * 3/2 + M_t * 5/4 - M_it * 6/5
+ }
+ } else {
+ V = M_i + M_t - M_it
+ }
+
+ df = ifelse(tdf == "min", 2, 20 - K)
+ my_adj = ifelse(adj, (n - 1) / (n - K), 1)
+
+ # test SE
+ test(vcov(est, se = "two", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, G.df = cdf)),
+ V * my_adj)
+
+ # test pvalue
+ my_tstat = tstat(est, se = "two", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, G.df = cdf))
+ test(pvalue(est, se = "two", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, G.df = cdf, t.df = tdf)),
+ 2*pt(-abs(my_tstat), df))
+
+ # cat("adj = ", adj, " ; K.fixef = ", k_val, " ; G.adj = ", c_adj, " t.df = ", tdf, "\n", sep = "")
+ }
+ }
+ }
+ }
+ }
>
>
> #
> # Comparison with sandwich and plm
> #
>
> # Data generation
> set.seed(0)
> N = 20 ; G = N / 5 ; T = N / G
> d = data.frame(y = rnorm(N), x = rnorm(N), grp = rep(1:G, T), tm = rep(1:T, each = G))
>
> # Estimations
> est_lm = lm(y ~ x + as.factor(grp) + as.factor(tm), data=d)
> est_feols = feols(y ~ x | grp + tm, data = d, vcov = "cluster")
>
> #
> # Standard
> #
>
> test(se(est_feols, se = "st")["x"], se(est_lm)["x"])
>
> #
> # Heteroskedasticity-robust
> #
>
> se_white_lm_HC1 = sqrt(vcovHC(est_lm, type = "HC1")["x", "x"])
> se_white_lm_HC0 = sqrt(vcovHC(est_lm, type = "HC0")["x", "x"])
>
> test(se(est_feols, se = "hetero"), se_white_lm_HC1)
> test(se(est_feols, se = "hetero", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), se_white_lm_HC0)
>
>
> #
> # Clustered
> #
>
> # Clustered by grp
> se_CL_grp_lm_HC1 = sqrt(vcovCL(est_lm, cluster = d$grp, type = "HC1")["x", "x"])
> se_CL_grp_lm_HC0 = sqrt(vcovCL(est_lm, cluster = d$grp, type = "HC0")["x", "x"])
>
> # How to get the lm
> test(se(est_feols, ssc = ssc(K.fixef = "full")), se_CL_grp_lm_HC1)
> test(se(est_feols, ssc = ssc(K.adj = FALSE, K.fixef = "full")), se_CL_grp_lm_HC0)
>
> #
> # Two way
> #
>
> # Clustered by grp & tm
> se_CL_2w_lm = sqrt(vcovCL(est_lm, cluster = ~ grp + tm, type = "HC1")["x", "x"])
> se_CL_2w_feols = se(est_feols, se = "twoway")
>
> test(se(est_feols, se = "twoway", ssc = ssc(K.fixef = "full", G.df = "conv")), se_CL_2w_lm)
>
> #
> # HC2/HC3
> #
>
> # HC2/HC3
> base = iris
> base$w = runif(nrow(base))
>
> est_feols = feols(Sepal.Length ~ Sepal.Width | Species, base)
> est_lm = lm(Sepal.Length ~ Sepal.Width + factor(Species), base)
>
> test(
+ vcov(est_feols, "hc2", ssc = ssc(K.adj = FALSE, G.adj = FALSE)),
+ sandwich::vcovHC(est_lm, "HC2")["Sepal.Width", "Sepal.Width"]
+ )
>
> test(
+ vcov(est_feols, "hc3", ssc = ssc(K.adj = FALSE, G.adj = FALSE)),
+ sandwich::vcovHC(est_lm, "HC3")["Sepal.Width", "Sepal.Width"]
+ )
>
> # HC2/HC3 with weights
> base = iris
> base$w = runif(nrow(base))
>
> est_feols = feols(Sepal.Length ~ Sepal.Width | Species, base, weights = ~ w)
> est_lm = lm(Sepal.Length ~ Sepal.Width + factor(Species), base, weights = base$w)
>
> test(
+ vcov(est_feols, "hc2", ssc = ssc(K.adj = FALSE, G.adj = FALSE)),
+ sandwich::vcovHC(est_lm, "HC2")["Sepal.Width", "Sepal.Width"]
+ )
>
> test(
+ vcov(est_feols, "hc3", ssc = ssc(K.adj = FALSE, G.adj = FALSE)),
+ sandwich::vcovHC(est_lm, "HC3")["Sepal.Width", "Sepal.Width"]
+ )
>
> # HC2/HC3 with GLM
> base$Sepal.Length = floor(base$Sepal.Length)
> est_feglm = feglm(
+ Sepal.Length ~ Sepal.Width | Species, base,
+ "poisson", weights = ~ w
+ )
> est_glm = glm(
+ Sepal.Length ~ Sepal.Width + factor(Species), base,
+ family = poisson(), weights = base$w
+ )
>
> test(
+ vcov(est_feglm, "hc2", ssc = ssc(K.adj = FALSE, G.adj = FALSE)),
+ sandwich::vcovHC(est_glm, "HC2")["Sepal.Width", "Sepal.Width"]
+ )
> test(
+ vcov(est_feglm, "hc3", ssc = ssc(K.adj = FALSE, G.adj = FALSE)),
+ sandwich::vcovHC(est_glm, "HC3")["Sepal.Width", "Sepal.Width"]
+ )
>
> # Fail when P_ii = 1
> base$Species = as.character(base$Species)
> base[1, "Species"] = "foo"
>
> est_pii_singular = feols(Sepal.Length ~ Sepal.Width | Species, base, fixef.rm = "none")
>
> test(vcov(est_pii_singular, "hc2"), "err")
>
> #
> # Newey-west + Driscoll-Kraay
> #
>
> if(requireNamespace("plm", quietly = TRUE)){
+
+ # library(plm)
+ data(Grunfeld, package = "plm")
+
+ est_panel_plm = plm::plm(inv ~ capital + as.factor(year), Grunfeld,
+ index = c("firm", "year"), model = "within")
+ est_panel_feols = feols(inv ~ capital | firm + year, Grunfeld,
+ panel.id = ~firm + year)
+
+ # NW
+ se_plm_NW = se(plm::vcovNW(est_panel_plm))["capital"]
+ test(se(est_panel_feols, vcov = NW ~ ssc(adj = FALSE, cluster.adj = FALSE)), se_plm_NW)
+
+ # DK
+ se_plm_DK = se(plm::vcovSCC(est_panel_plm))["capital"]
+ test(se(est_panel_feols, vcov = DK ~ ssc(adj = FALSE, cluster.adj = FALSE)), se_plm_DK)
+ }
>
> #
> # Checking the calls work properly
> #
>
> data(trade)
>
> est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination, trade)
>
> se_clust = se(est_pois, se = "cluster", cluster = "Product")
> test(se(est_pois, cluster = trade$Product), se_clust)
> test(se(est_pois, cluster = ~Product), se_clust)
>
> se_two = se(est_pois, se = "twoway", cluster = trade[, c("Product", "Destination")])
> test(se_two, se(est_pois, cluster = c("Product", "Destination")))
> test(se_two, se(est_pois, cluster = ~Product+Destination))
>
> se_clu_comb = se(est_pois, cluster = "Product^Destination")
> test(se_clu_comb, se(est_pois, cluster = paste(trade$Product, trade$Destination)))
> test(se_clu_comb, se(est_pois, cluster = ~Product^Destination))
>
> se_two_comb = se(est_pois, cluster = c("Origin^Destination", "Product"))
> test(se_two_comb, se(est_pois, cluster = list(paste(trade$Origin, trade$Destination),
+ trade$Product)))
> test(se_two_comb, se(est_pois, cluster = ~Origin^Destination + Product))
>
> # With cluster removed
> base = trade
> base$Euros[base$Origin == "FR"] = 0
> est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination, base)
*** caught segfault ***
address 0x4821, cause 'memory not mapped'
Traceback:
1: cpp_crossprod(jacob.mat, ll_d2, nthreads)
2: hessian(.par, ...)
3: stats::nlminb(start = start, objective = femlm_ll, env = env, lower = lower, upper = upper, gradient = gradient, hessian = hessian, control = opt.control)
4: doTryCatch(return(expr), name, parentenv, handler)
5: tryCatchOne(expr, names, parentenv, handlers[[1L]])
6: tryCatchList(expr, classes, parentenv, handlers)
7: tryCatch(expr, error = function(e) { call <- conditionCall(e) if (!is.null(call)) { if (identical(call[[1L]], quote(doTryCatch))) call <- sys.call(-4L) dcall <- deparse(call, nlines = 1L) prefix <- paste("Error in", dcall, ": ") LONG <- 75L sm <- strsplit(conditionMessage(e), "\n")[[1L]] w <- 14L + nchar(dcall, type = "w") + nchar(sm[1L], type = "w") if (is.na(w)) w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L], type = "b") if (w > LONG) prefix <- paste0(prefix, "\n ") } else prefix <- "Error : " msg <- paste0(prefix, conditionMessage(e), "\n") .Internal(seterrmessage(msg[1L])) if (!silent && isTRUE(getOption("show.error.messages"))) { cat(msg, file = outFile) .Internal(printDeferredWarnings()) } invisible(structure(msg, class = "try-error", condition = e))})
8: try(stats::nlminb(start = start, objective = femlm_ll, env = env, lower = lower, upper = upper, gradient = gradient, hessian = hessian, control = opt.control), silent = TRUE)
9: feNmlm(fml = fml, data = data, family = family, fixef = fixef, fixef.rm = fixef.rm, offset = offset, subset = subset, split = split, fsplit = fsplit, split.keep = split.keep, split.drop = split.drop, vcov = vcov, cluster = cluster, se = se, ssc = ssc, panel.id = panel.id, panel.time.step = panel.time.step, panel.duplicate.method = panel.duplicate.method, start = start, fixef.tol = fixef.tol, fixef.iter = fixef.iter, nthreads = nthreads, lean = lean, verbose = verbose, warn = warn, notes = notes, theta.init = theta.init, fixef.keep_names = fixef.keep_names, mem.clean = mem.clean, origin_bis = "femlm", mc_origin_bis = match.call(), call_env_bis = call_env_bis, only.env = only.env, only.coef = only.coef, data.save = data.save, env = env, ...)
10: doTryCatch(return(expr), name, parentenv, handler)
11: tryCatchOne(expr, names, parentenv, handlers[[1L]])
12: tryCatchList(expr, classes, parentenv, handlers)
13: tryCatch(expr, error = function(e) { call <- conditionCall(e) if (!is.null(call)) { if (identical(call[[1L]], quote(doTryCatch))) call <- sys.call(-4L) dcall <- deparse(call, nlines = 1L) prefix <- paste("Error in", dcall, ": ") LONG <- 75L sm <- strsplit(conditionMessage(e), "\n")[[1L]] w <- 14L + nchar(dcall, type = "w") + nchar(sm[1L], type = "w") if (is.na(w)) w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L], type = "b") if (w > LONG) prefix <- paste0(prefix, "\n ") } else prefix <- "Error : " msg <- paste0(prefix, conditionMessage(e), "\n") .Internal(seterrmessage(msg[1L])) if (!silent && isTRUE(getOption("show.error.messages"))) { cat(msg, file = outFile) .Internal(printDeferredWarnings()) } invisible(structure(msg, class = "try-error", condition = e))})
14: try(feNmlm(fml = fml, data = data, family = family, fixef = fixef, fixef.rm = fixef.rm, offset = offset, subset = subset, split = split, fsplit = fsplit, split.keep = split.keep, split.drop = split.drop, vcov = vcov, cluster = cluster, se = se, ssc = ssc, panel.id = panel.id, panel.time.step = panel.time.step, panel.duplicate.method = panel.duplicate.method, start = start, fixef.tol = fixef.tol, fixef.iter = fixef.iter, nthreads = nthreads, lean = lean, verbose = verbose, warn = warn, notes = notes, theta.init = theta.init, fixef.keep_names = fixef.keep_names, mem.clean = mem.clean, origin_bis = "femlm", mc_origin_bis = match.call(), call_env_bis = call_env_bis, only.env = only.env, only.coef = only.coef, data.save = data.save, env = env, ...), silent = TRUE)
15: femlm(Euros ~ log(dist_km) | Origin + Destination, base)
An irrecoverable exception occurred. R is aborting now ...
Segmentation fault
- checking for unstated dependencies in vignettes ... OK
- checking package vignettes ... OK
- checking re-building of vignette outputs ... [37s/59s] OK
- checking PDF version of manual ... [25s/32s] OK
- checking HTML version of manual ... [11s/16s] OK
- checking for non-standard things in the check directory ... OK
- DONE
Status: 1 ERROR