Marginal Effects

Warning

THIS IS A DRAFT.

library(tidyverse)
library(polisciols)
library(modelsummary)
library(marginaleffects)
library(broom)
m <- lm(net_fdi_inflows ~ pcj + gdp_per_capita, data = ebj)

modelsummary(m, 
             coef_rename = c("pcjPCJ institutions" = "PCJ institutions established",
                             "gdp_per_capita" = "GDP per capita (current USD)"),
             stars = T)
 (1)
(Intercept) −319.173
(396.052)
PCJ institutions established 1673.288*
(714.015)
GDP per capita (current USD) 0.318**
(0.105)
Num.Obs. 95
R2 0.142
R2 Adj. 0.124
AIC 1777.5
BIC 1787.7
Log.Lik. −884.742
F 7.638
RMSE 2681.52
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Our model is as follows:

\[ Net\ FDI\ inflows_i = −319.173 + 1673.288 PCJ_i + 0.318 GDP\ per\ capita_i + \epsilon_i \]

So, what is the estimated effect of a state establishing post-conflict justice (PCJ) institutions on its net FDI inflows?

Average values approach

We will use our model to predict a country’s net FDI inflows when pcj = 0 and when pcj = 1 and then find the difference between these predictions. But, we need to make decision about how we will handle our other independent variable: GDP per capita.

The average values approach uses the average of all countries’ observed GDP per capita. So, when the country has not established a PCJ institution, we are simply finding:

\[ \hat{Net\ FDI\ inflows} = −319.173 + 1673.288*0 + 0.318*\bar{GDP\ per\ capita} \]

In R, first find the average GDP per capita across our countries:

avg_gdp_per_cap <- mean(ebj$gdp_per_capita)

scales::dollar(avg_gdp_per_cap)
[1] "$2,390.53"

Second, build out the hypothetical countries we will compare:

pred_df <- tibble(
  pcj = factor(c(0, 1), labels = c("No institutions", "PCJ institutions")),
  gdp_per_capita = avg_gdp_per_cap
)

pred_df
# A tibble: 2 × 2
  pcj              gdp_per_capita
  <fct>                     <dbl>
1 No institutions           2391.
2 PCJ institutions          2391.

Third, use our model to predict the net FDI for these hypothetical countries:

beta_0 <- tidy(m) |> 
  filter(term == "(Intercept)") |> 
  pull(estimate)

beta_pcj <- tidy(m) |> 
  filter(term == "pcjPCJ institutions") |> 
  pull(estimate)

beta_gdppc <- tidy(m) |> 
  filter(term == "gdp_per_capita") |> 
  pull(estimate)

comparison <- pred_df |> 
  mutate(.fitted = beta_0 + beta_pcj*(as.numeric(pcj) - 1) + beta_gdppc * gdp_per_capita)

comparison
# A tibble: 2 × 3
  pcj              gdp_per_capita .fitted
  <fct>                     <dbl>   <dbl>
1 No institutions           2391.    442.
2 PCJ institutions          2391.   2115.

Finally, calculate the difference:

comparison |> 
  mutate(diff = .fitted - lag(.fitted))
# A tibble: 2 × 4
  pcj              gdp_per_capita .fitted  diff
  <fct>                     <dbl>   <dbl> <dbl>
1 No institutions           2391.    442.   NA 
2 PCJ institutions          2391.   2115. 1673.
Tip

Alternatively, you can use base R’s predict:

comparison_pred <- predict(m, pred_df)
comparison_pred
        1         2 
 442.1015 2115.3899 
comparison_pred[2] - comparison_pred[1]
       2 
1673.288 

Or (my favorite) broom::augment():

augment(m, newdata = pred_df) |> 
  mutate(diff = .fitted - lag(.fitted))
# A tibble: 2 × 4
  pcj              gdp_per_capita .fitted  diff
  <fct>                     <dbl>   <dbl> <dbl>
1 No institutions           2391.    442.   NA 
2 PCJ institutions          2391.   2115. 1673.

More succinctly, you can use marginaleffect::avg_comparisons():

avg_comparisons(m, variables = "pcj")

 Term                           Contrast Estimate Std. Error    z Pr(>|z|)   S
  pcj PCJ institutions - No institutions     1673        714 2.34   0.0191 5.7
 2.5 % 97.5 %
   274   3073

Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high 
Type:  response 

More details on the mechanics of this function can be found here.

Observed values approach

First, adjust your data so that all observations are set to each value of interest.

ebj_no_pcj <- ebj |> 
  transmute(country_name, 
            pcj = factor(0, levels = 0:1, labels = c("No institutions", "PCJ institutions")),
            gdp_per_capita)

head(ebj_no_pcj)
       country_name             pcj gdp_per_capita
1             Haiti No institutions       1182.498
2             Haiti No institutions       1088.680
3 Trinidad & Tobago No institutions       7742.736
4            Mexico No institutions       6894.704
5            Mexico No institutions       7780.053
6         Guatemala No institutions       3061.873
ebj_pcj <- ebj |> 
  transmute(country_name, 
            pcj = factor(1, levels = 0:1, labels = c("No institutions", "PCJ institutions")),
            gdp_per_capita)

head(ebj_pcj)
       country_name              pcj gdp_per_capita
1             Haiti PCJ institutions       1182.498
2             Haiti PCJ institutions       1088.680
3 Trinidad & Tobago PCJ institutions       7742.736
4            Mexico PCJ institutions       6894.704
5            Mexico PCJ institutions       7780.053
6         Guatemala PCJ institutions       3061.873

Next, use the model to predict your outcome for every observation:

pred_no_pcj <- ebj_no_pcj |> 
  mutate(.fitted = beta_0 + beta_pcj*(as.numeric(pcj) - 1) + beta_gdppc * gdp_per_capita)

head(pred_no_pcj)
       country_name             pcj gdp_per_capita    .fitted
1             Haiti No institutions       1182.498   57.39903
2             Haiti No institutions       1088.680   27.52235
3 Trinidad & Tobago No institutions       7742.736 2146.53873
4            Mexico No institutions       6894.704 1876.47876
5            Mexico No institutions       7780.053 2158.42248
6         Guatemala No institutions       3061.873  655.89514
pred_pcj <- ebj_pcj |> 
  mutate(.fitted = beta_0 + beta_pcj*(as.numeric(pcj) - 1) + beta_gdppc * gdp_per_capita)

head(pred_pcj)
       country_name              pcj gdp_per_capita  .fitted
1             Haiti PCJ institutions       1182.498 1730.687
2             Haiti PCJ institutions       1088.680 1700.811
3 Trinidad & Tobago PCJ institutions       7742.736 3819.827
4            Mexico PCJ institutions       6894.704 3549.767
5            Mexico PCJ institutions       7780.053 3831.711
6         Guatemala PCJ institutions       3061.873 2329.184
Note

For example, we now know our model’s predicted net FDI inflow for every country when it has and has not established a PCJ institution.

For example:

pred_tt <- pred_no_pcj |> 
  bind_rows(pred_pcj) |> 
  filter(country_name == "Trinidad & Tobago")

pred_tt
       country_name              pcj gdp_per_capita  .fitted
1 Trinidad & Tobago  No institutions       7742.736 2146.539
2 Trinidad & Tobago PCJ institutions       7742.736 3819.827

What is the estimated effect for this country of having a PCJ institution?

pred_tt |> 
  mutate(diff = .fitted - lag(.fitted))
       country_name              pcj gdp_per_capita  .fitted     diff
1 Trinidad & Tobago  No institutions       7742.736 2146.539       NA
2 Trinidad & Tobago PCJ institutions       7742.736 3819.827 1673.288

Next, find the average effect:

avg_effect <- pred_no_pcj |> 
  bind_rows(pred_pcj) |> 
  group_by(pcj) |> 
  summarise(avg_fitted = mean(.fitted))

avg_effect
# A tibble: 2 × 2
  pcj              avg_fitted
  <fct>                 <dbl>
1 No institutions        442.
2 PCJ institutions      2115.

Finally, find the difference between these average effects:

avg_effect |> 
  mutate(diff = avg_fitted - lag(avg_fitted))
# A tibble: 2 × 3
  pcj              avg_fitted  diff
  <fct>                 <dbl> <dbl>
1 No institutions        442.   NA 
2 PCJ institutions      2115. 1673.