library(tidyverse)
library(polisciols)
library(modelsummary)
library(marginaleffects)
library(broom)
Marginal Effects
THIS IS A DRAFT.
<- lm(net_fdi_inflows ~ pcj + gdp_per_capita, data = ebj)
m
modelsummary(m,
coef_rename = c("pcjPCJ institutions" = "PCJ institutions established",
"gdp_per_capita" = "GDP per capita (current USD)"),
stars = T)
(1) | |
---|---|
(Intercept) | −319.173 |
(396.052) | |
PCJ institutions established | 1673.288* |
(714.015) | |
GDP per capita (current USD) | 0.318** |
(0.105) | |
Num.Obs. | 95 |
R2 | 0.142 |
R2 Adj. | 0.124 |
AIC | 1777.5 |
BIC | 1787.7 |
Log.Lik. | −884.742 |
F | 7.638 |
RMSE | 2681.52 |
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
Our model is as follows:
\[ Net\ FDI\ inflows_i = −319.173 + 1673.288 PCJ_i + 0.318 GDP\ per\ capita_i + \epsilon_i \]
So, what is the estimated effect of a state establishing post-conflict justice (PCJ) institutions on its net FDI inflows?
Average values approach
We will use our model to predict a country’s net FDI inflows when pcj = 0
and when pcj = 1
and then find the difference between these predictions. But, we need to make decision about how we will handle our other independent variable: GDP per capita.
The average values approach uses the average of all countries’ observed GDP per capita. So, when the country has not established a PCJ institution, we are simply finding:
\[ \hat{Net\ FDI\ inflows} = −319.173 + 1673.288*0 + 0.318*\bar{GDP\ per\ capita} \]
In R, first find the average GDP per capita across our countries:
<- mean(ebj$gdp_per_capita)
avg_gdp_per_cap
::dollar(avg_gdp_per_cap) scales
[1] "$2,390.53"
Second, build out the hypothetical countries we will compare:
<- tibble(
pred_df pcj = factor(c(0, 1), labels = c("No institutions", "PCJ institutions")),
gdp_per_capita = avg_gdp_per_cap
)
pred_df
# A tibble: 2 × 2
pcj gdp_per_capita
<fct> <dbl>
1 No institutions 2391.
2 PCJ institutions 2391.
Third, use our model to predict the net FDI for these hypothetical countries:
<- tidy(m) |>
beta_0 filter(term == "(Intercept)") |>
pull(estimate)
<- tidy(m) |>
beta_pcj filter(term == "pcjPCJ institutions") |>
pull(estimate)
<- tidy(m) |>
beta_gdppc filter(term == "gdp_per_capita") |>
pull(estimate)
<- pred_df |>
comparison mutate(.fitted = beta_0 + beta_pcj*(as.numeric(pcj) - 1) + beta_gdppc * gdp_per_capita)
comparison
# A tibble: 2 × 3
pcj gdp_per_capita .fitted
<fct> <dbl> <dbl>
1 No institutions 2391. 442.
2 PCJ institutions 2391. 2115.
Finally, calculate the difference:
|>
comparison mutate(diff = .fitted - lag(.fitted))
# A tibble: 2 × 4
pcj gdp_per_capita .fitted diff
<fct> <dbl> <dbl> <dbl>
1 No institutions 2391. 442. NA
2 PCJ institutions 2391. 2115. 1673.
Alternatively, you can use base R’s predict:
<- predict(m, pred_df)
comparison_pred comparison_pred
1 2
442.1015 2115.3899
2] - comparison_pred[1] comparison_pred[
2
1673.288
Or (my favorite) broom::augment()
:
augment(m, newdata = pred_df) |>
mutate(diff = .fitted - lag(.fitted))
# A tibble: 2 × 4
pcj gdp_per_capita .fitted diff
<fct> <dbl> <dbl> <dbl>
1 No institutions 2391. 442. NA
2 PCJ institutions 2391. 2115. 1673.
More succinctly, you can use marginaleffect::avg_comparisons()
:
avg_comparisons(m, variables = "pcj")
Term Contrast Estimate Std. Error z Pr(>|z|) S
pcj PCJ institutions - No institutions 1673 714 2.34 0.0191 5.7
2.5 % 97.5 %
274 3073
Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
More details on the mechanics of this function can be found here.
Observed values approach
First, adjust your data so that all observations are set to each value of interest.
<- ebj |>
ebj_no_pcj transmute(country_name,
pcj = factor(0, levels = 0:1, labels = c("No institutions", "PCJ institutions")),
gdp_per_capita)
head(ebj_no_pcj)
country_name pcj gdp_per_capita
1 Haiti No institutions 1182.498
2 Haiti No institutions 1088.680
3 Trinidad & Tobago No institutions 7742.736
4 Mexico No institutions 6894.704
5 Mexico No institutions 7780.053
6 Guatemala No institutions 3061.873
<- ebj |>
ebj_pcj transmute(country_name,
pcj = factor(1, levels = 0:1, labels = c("No institutions", "PCJ institutions")),
gdp_per_capita)
head(ebj_pcj)
country_name pcj gdp_per_capita
1 Haiti PCJ institutions 1182.498
2 Haiti PCJ institutions 1088.680
3 Trinidad & Tobago PCJ institutions 7742.736
4 Mexico PCJ institutions 6894.704
5 Mexico PCJ institutions 7780.053
6 Guatemala PCJ institutions 3061.873
Next, use the model to predict your outcome for every observation:
<- ebj_no_pcj |>
pred_no_pcj mutate(.fitted = beta_0 + beta_pcj*(as.numeric(pcj) - 1) + beta_gdppc * gdp_per_capita)
head(pred_no_pcj)
country_name pcj gdp_per_capita .fitted
1 Haiti No institutions 1182.498 57.39903
2 Haiti No institutions 1088.680 27.52235
3 Trinidad & Tobago No institutions 7742.736 2146.53873
4 Mexico No institutions 6894.704 1876.47876
5 Mexico No institutions 7780.053 2158.42248
6 Guatemala No institutions 3061.873 655.89514
<- ebj_pcj |>
pred_pcj mutate(.fitted = beta_0 + beta_pcj*(as.numeric(pcj) - 1) + beta_gdppc * gdp_per_capita)
head(pred_pcj)
country_name pcj gdp_per_capita .fitted
1 Haiti PCJ institutions 1182.498 1730.687
2 Haiti PCJ institutions 1088.680 1700.811
3 Trinidad & Tobago PCJ institutions 7742.736 3819.827
4 Mexico PCJ institutions 6894.704 3549.767
5 Mexico PCJ institutions 7780.053 3831.711
6 Guatemala PCJ institutions 3061.873 2329.184
For example, we now know our model’s predicted net FDI inflow for every country when it has and has not established a PCJ institution.
For example:
<- pred_no_pcj |>
pred_tt bind_rows(pred_pcj) |>
filter(country_name == "Trinidad & Tobago")
pred_tt
country_name pcj gdp_per_capita .fitted
1 Trinidad & Tobago No institutions 7742.736 2146.539
2 Trinidad & Tobago PCJ institutions 7742.736 3819.827
What is the estimated effect for this country of having a PCJ institution?
|>
pred_tt mutate(diff = .fitted - lag(.fitted))
country_name pcj gdp_per_capita .fitted diff
1 Trinidad & Tobago No institutions 7742.736 2146.539 NA
2 Trinidad & Tobago PCJ institutions 7742.736 3819.827 1673.288
Next, find the average effect:
<- pred_no_pcj |>
avg_effect bind_rows(pred_pcj) |>
group_by(pcj) |>
summarise(avg_fitted = mean(.fitted))
avg_effect
# A tibble: 2 × 2
pcj avg_fitted
<fct> <dbl>
1 No institutions 442.
2 PCJ institutions 2115.
Finally, find the difference between these average effects:
|>
avg_effect mutate(diff = avg_fitted - lag(avg_fitted))
# A tibble: 2 × 3
pcj avg_fitted diff
<fct> <dbl> <dbl>
1 No institutions 442. NA
2 PCJ institutions 2115. 1673.