table(my.df$V32)
##
## 1 2 3 4 5 8 9
## 713 506 75 57 9 23 12
attributes(my.df$V32)
## $label
## [1] "Q6a Differences in income in <Rs country> are too large."
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Strongly agree Agree
## 1 2
## Neither agree nor disagree Disagree
## 3 4
## Strongly disagree Cant choose
## 5 8
## NA
## 9
library(dplyr)
my.df <- my.df %>% mutate(V32_m = ifelse(V32 == 8 | V32 == 9, NA, V32))
table(my.df$V32_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 713 506 75 57 9 35
class(my.df$V32_m)
## [1] "numeric"
my.df <- my.df %>% mutate (incdiff100 = (V32_m - 1) *25)
attributes(my.df$incdiff100)$label <- attributes (my.df$V32)$label
names(attributes(my.df$incdiff100)$labels)
## NULL
attributes(my.df$incdiff100)$labels<-c("Strongly agree" = 0,
"Agree" = 25,
"Neither agree nor disagree" = 50,
"Disagree" = 75,
"Strongly disagree" = 100)
table(my.df$incdiff100)
##
## 0 25 50 75 100
## 713 506 75 57 9
attributes(my.df$incdiff100)
## $label
## [1] "Q6a Differences in income in <Rs country> are too large."
##
## $labels
## Strongly agree Agree
## 0 25
## Neither agree nor disagree Disagree
## 50 75
## Strongly disagree
## 100
class(my.df$incdiff100)
## [1] "numeric"
table(my.df$V44)
##
## 1 2 3 4 5 6 7 8 9 10 99
## 12 27 98 120 277 413 284 113 14 4 33
attributes(my.df$V44)
## $label
## [1] "Q10a <TOPBOT> Groups tending towards top and bottom. Where would you put yoursel"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Bottom, Lowest, 01 02 03 04
## 1 2 3 4
## 05 06 07 08
## 5 6 7 8
## 09 Top, Highest, 10 Refused Dont know
## 9 10 97 98
## NA
## 99
table(my.df$V45)
##
## 1 2 3 4 5 6 7 8 9 10 99
## 29 72 138 178 264 296 206 112 35 12 53
attributes(my.df$V45)
## $label
## [1] "Q10b Where did the family that you grew up in, fit in then?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Bottom, Lowest, 01 02 03 04
## 1 2 3 4
## 05 06 07 08
## 5 6 7 8
## 09 Top, Highest, 10 Dont know NA
## 9 10 98 99
my.df <- my.df %>% mutate(V44_m = ifelse(V44 == 97 | V44 == 98 | V44 == 99, NA, V44))
my.df <- my.df %>% mutate(V45_m = ifelse(V45 == 98 | V45 == 99, NA, V45))
table(my.df$V44_m, useNA = "ifany")
##
## 1 2 3 4 5 6 7 8 9 10 <NA>
## 12 27 98 120 277 413 284 113 14 4 33
table(my.df$V45_m, useNA = "ifany")
##
## 1 2 3 4 5 6 7 8 9 10 <NA>
## 29 72 138 178 264 296 206 112 35 12 53
class(my.df$V44_m)
## [1] "numeric"
class(my.df$V45_m)
## [1] "numeric"
table(my.df$V6)
##
## 1 2 3 4 5 8 9
## 107 283 467 392 105 16 25
attributes(my.df$V6)
## $label
## [1] "Q1a Getting ahead: How important is coming from a wealthy family?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Essential Very important Fairly important
## 1 2 3
## Not very important Not important at all Cant choose
## 4 5 8
## NA
## 9
table(my.df$V7)
##
## 1 2 3 4 5 8 9
## 120 566 486 150 41 8 24
attributes(my.df$V7)
## $label
## [1] "Q1b Getting ahead: How important is having well-educated parents?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Essential Very important Fairly important
## 1 2 3
## Not very important Not important at all Cant choose
## 4 5 8
## NA
## 9
table(my.df$V11)
##
## 1 2 3 4 5 8 9
## 238 646 387 86 8 10 20
attributes(my.df$V11)
## $label
## [1] "Q1f Getting ahead: How important is knowing the right people?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Essential Very important Fairly important
## 1 2 3
## Not very important Not important at all Cant choose
## 4 5 8
## NA
## 9
table(my.df$V12)
##
## 1 2 3 4 5 8 9
## 35 154 375 596 168 53 14
attributes(my.df$V12)
## $label
## [1] "Q1g Getting ahead: How important is having political connections?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Essential Very important Fairly important
## 1 2 3
## Not very important Not important at all Cant choose
## 4 5 8
## NA
## 9
table(my.df$V13)
##
## 1 2 3 4 5 8 9
## 21 35 80 271 876 96 16
attributes(my.df$V13)
## $label
## [1] "Q1h Getting ahead: How important is giving bribes?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Essential Very important Fairly important
## 1 2 3
## Not very important Not important at all Cant choose
## 4 5 8
## NA
## 9
my.df <- my.df %>% mutate(V6_m = ifelse(V6 == 8 | V6 == 9, NA, V6),
V7_m = ifelse(V7 == 8 | V7 == 9, NA, V7),
V11_m = ifelse(V11 == 8 | V11 == 9, NA, V11),
V12_m = ifelse(V12 == 8 | V12 == 9, NA, V12),
V13_m = ifelse(V13 == 8 | V13 == 9, NA, V13))
table(my.df$V6_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 107 283 467 392 105 41
table(my.df$V7_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 120 566 486 150 41 32
table(my.df$V11_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 238 646 387 86 8 30
table(my.df$V12_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 35 154 375 596 168 67
table(my.df$V13_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 21 35 80 271 876 112
label.vc <- c("Not important at all" = 1,
"Not very important" = 2,
"Fairly important" = 3,
"Very important" = 4,
"Essential" = 5)
attributes(my.df$V6_m)$labels <- label.vc
attributes(my.df$V7_m)$labels <- label.vc
attributes(my.df$V11_m)$labels <- label.vc
attributes(my.df$V12_m)$labels <- label.vc
attributes(my.df$V13_m)$labels <- label.vc
attributes(my.df$V6_m)$label <- attributes (my.df$V6)$label
attributes(my.df$V7_m)$label <- attributes (my.df$V7)$label
attributes(my.df$V11_m)$label <- attributes (my.df$V11)$label
attributes(my.df$V12_m)$label <- attributes (my.df$V12)$label
attributes(my.df$V13_m)$label <- attributes (my.df$V13)$label
class(my.df$V6_m)
## [1] "numeric"
attributes(my.df$V6_m)
## $labels
## Not important at all Not very important Fairly important
## 1 2 3
## Very important Essential
## 4 5
##
## $label
## [1] "Q1a Getting ahead: How important is coming from a wealthy family?"
class(my.df$V7_m)
## [1] "numeric"
attributes(my.df$V7_m)
## $labels
## Not important at all Not very important Fairly important
## 1 2 3
## Very important Essential
## 4 5
##
## $label
## [1] "Q1b Getting ahead: How important is having well-educated parents?"
class(my.df$V11_m)
## [1] "numeric"
attributes(my.df$V11_m)
## $labels
## Not important at all Not very important Fairly important
## 1 2 3
## Very important Essential
## 4 5
##
## $label
## [1] "Q1f Getting ahead: How important is knowing the right people?"
class(my.df$V12_m)
## [1] "numeric"
attributes(my.df$V12_m)
## $labels
## Not important at all Not very important Fairly important
## 1 2 3
## Very important Essential
## 4 5
##
## $label
## [1] "Q1g Getting ahead: How important is having political connections?"
class(my.df$V13_m)
## [1] "numeric"
attributes(my.df$V13_m)
## $labels
## Not important at all Not very important Fairly important
## 1 2 3
## Very important Essential
## 4 5
##
## $label
## [1] "Q1h Getting ahead: How important is giving bribes?"
my.df <- my.df %>% mutate(V6_mr = (V6_m * -1) + 6,
V7_mr = (V7_m * -1) + 6,
V11_mr = (V11_m * -1) +6,
V12_mr = (V12_m * -1) +6,
V13_mr = (V13_m * -1) +6)
my.df <- my.df %>% mutate(V6_mr_100 = (V6_mr - 1) * 25,
V7_mr_100 = (V7_mr - 1) * 25,
V11_mr_100 = (V11_mr - 1) * 25,
V12_mr_100 = (V12_mr - 1) * 25,
V13_mr_100 = (V13_mr - 1) * 25)
my.df <- my.df %>% mutate(p_p_justice = (V6_mr_100 +
V7_mr_100 +
V11_mr_100 +
V12_mr_100 +
V13_mr_100) / 5)
label_p_p_j.vc <- c("highest perception of procedural justice" = 0,
"lowest perception of procedural justice" = 100)
attributes(my.df$p_p_justice)$label <- c("Index from 0 to 100 to represent the level of percieved procedural justice")
attributes(my.df$p_p_justice)$labels <- label_p_p_j.vc
table(my.df$p_p_justice, useNA = "ifany")
##
## 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75
## 2 1 9 12 32 68 119 170 170 142 146 114 69 52 40 28
## 80 85 90 95 100 <NA>
## 17 8 4 4 6 182
class(my.df$p_p_justice)
## [1] "numeric"
attributes(my.df$p_p_justice)
## $labels
## highest perception of procedural justice
## 0
## lowest perception of procedural justice
## 100
##
## $label
## [1] "Index from 0 to 100 to represent the level of percieved procedural justice"
attributes(my.df$V54)
## $label
## [1] "Q14a Type of society: What type of society is <Rs country> today - which diagram"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Type A Type B Type C Type D Type E Cant choose
## 1 2 3 4 5 8
## NA
## 9
table(my.df$V54)
##
## 1 2 3 4 5 8
## 236 444 289 233 53 140
my.df <- my.df %>% mutate(V54_m = ifelse(V54 == 8 | V54 == 9, NA, V54))
table(my.df$V54_m, useNA = "ifany")
##
## 1 2 3 4 5 <NA>
## 236 444 289 233 53 140
class(my.df$V54_m)
## [1] "numeric"
my.df <- my.df %>% mutate(middleclass_soc = ifelse(V54_m == 1 |
V54_m == 2 |
V54_m == 3 |
V54_m == 5, 0, V54_m),
ifelse(V54_m == 4, 1, V54_m))
table(my.df$middleclass_soc, useNA = "ifany")
##
## 0 4 <NA>
## 1022 233 140
attributes(my.df$SEX)
## $label
## [1] "R: Sex"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Male Female NA, refused
## 1 2 9
table(my.df$SEX)
##
## 1 2
## 703 692
my.df <- my.df %>% mutate(SEX_m = ifelse(SEX == 9, NA, SEX))
table(my.df$SEX_m, useNA = "ifany")
##
## 1 2
## 703 692
attributes(my.df$AGE)
## $label
## [1] "R: Age"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## 15 16 17 18
## 15 16 17 18
## 19 20 21 22
## 19 20 21 22
## 23 24 25 26
## 23 24 25 26
## 27 28 29 30
## 27 28 29 30
## 31 32 33 34
## 31 32 33 34
## 35 36 37 38
## 35 36 37 38
## 39 40 41 42
## 39 40 41 42
## 43 44 45 46
## 43 44 45 46
## 47 48 49 50
## 47 48 49 50
## 51 52 53 54
## 51 52 53 54
## 55 56 57 58
## 55 56 57 58
## 59 60 61 62
## 59 60 61 62
## 63 64 65 66
## 63 64 65 66
## 67 68 69 70
## 67 68 69 70
## 71 72 73 74
## 71 72 73 74
## 75 76 77 78
## 75 76 77 78
## 79 80 81 82
## 79 80 81 82
## 83 84 85 86
## 83 84 85 86
## 87 88 89 90
## 87 88 89 90
## 91 92 93 94
## 91 92 93 94
## 95 96 97 98 years or more
## 95 96 97 98
## NA, refused
## 99
table(my.df$AGE)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
## 6 21 25 24 22 20 22 16 21 19 19 21 24 21 15 11 19 20 18 11 24 23 31 26 32 32
## 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
## 30 37 14 28 24 45 24 22 21 32 22 23 23 24 20 30 14 15 18 31 15 14 33 15 31 21
## 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 94 97 98 99
## 17 19 14 18 20 21 14 11 12 6 13 8 8 7 8 8 2 4 1 3 1 1 1 1 1 7
my.df <- my.df %>% mutate(AGE_m = ifelse(AGE == 98 | AGE == 99, NA, AGE))
table(my.df$AGE_m, useNA = "ifany")
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 6 21 25 24 22 20 22 16 21 19 19 21 24 21 15 11
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## 19 20 18 11 24 23 31 26 32 32 30 37 14 28 24 45
## 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
## 24 22 21 32 22 23 23 24 20 30 14 15 18 31 15 14
## 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
## 33 15 31 21 17 19 14 18 20 21 14 11 12 6 13 8
## 82 83 84 85 86 87 88 89 90 91 94 97 <NA>
## 8 7 8 8 2 4 1 3 1 1 1 1 8
table(my.df$DE_DEGR, useNA = "ifany")
##
## 1 2 3 4 5 6 7 8 9 99
## 9 20 456 434 64 138 77 187 8 2
my.df <- my.df %>% mutate(DE_DEGR_m = ifelse(DE_DEGR == 9 | DE_DEGR == 99 |
DE_DEGR == 1, NA, DE_DEGR))
attributes(my.df$V66)
## $label
## [1] "Q20 Which social class would you say you belong to?"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## NAV (AT,FR,PH) Lower class Working class Lower middle class
## 0 1 2 3
## Middle class Upper middle class Upper class Dont know
## 4 5 6 8
## NA
## 9
table(my.df$V66)
##
## 1 2 4 5 6 8 9
## 44 387 786 146 4 17 11
my.df <- my.df %>% mutate(V66_m = ifelse(V66 == 0 | V66 == 8 | V66 == 9, NA, V66))
table(my.df$V66_m, useNA = "ifany")
##
## 1 2 4 5 6 <NA>
## 44 387 786 146 4 28
class(my.df$V66_m)
## [1] "numeric"
table(my.df$SEX)
##
## 1 2
## 703 692
table(my.df$SEX, useNA = "ifany")
##
## 1 2
## 703 692
attributes(my.df$SEX)
## $label
## [1] "R: Sex"
##
## $format.stata
## [1] "%12.0g"
##
## $labels
## Male Female NA, refused
## 1 2 9
my.df$female[my.df$SEX==2]<-1
my.df$female[my.df$SEX!=2]<-0
table(my.df$female)
##
## 0 1
## 703 692
## Idea to rescale the variable into a 1 to 5 variable but the scale larsen uses
## was direclty provided by ISSP and I do not have that availabe to me and
## a 1 to 5 rescaling does not make sense using DE_DEGR
# table(my.df$DE_DEGR_m, useNA = "ifany")
# attributes(my.df$DE_DEGR_m)$labels
# my.df$educ5 <- NA
# my.df$educ5[my.df$DE_DEGR_m == 2] <- "No certificate"
# my.df$educ5[my.df$DE_DEGR_m == 3 | my.df$DE_DEGR_m == 4] <- "Primary"
# my.df$educ5[my.df$DE_DEGR_m == 5 | my.df$DE_DEGR_m == 6] <- "Secondary"
# my.df$educ5[my.df$DE_DEGR_m == 7] <- "Special Tertiary"
# my.df$educ5[my.df$DE_DEGR_m == 8] <- "Tertiary"
# my.df$educ5 <- factor(my.df$educ5, levels = c("Secondary", "No certificate", "Primary", # "Special Tertiary", "Tertiary"),
# labels = c("Secondary", "No certificate", "Primary", "Special Tertiary", "Tertiary"))
# class(my.df$educ5)
# table(my.df$educ5, useNA = "ifany")
# levels(my.df$educ5)
table(my.df$DE_DEGR, useNA = "ifany")
##
## 1 2 3 4 5 6 7 8 9 99
## 9 20 456 434 64 138 77 187 8 2
my.df <- my.df %>% mutate(DE_DEGR_m = ifelse(DE_DEGR == 9 | DE_DEGR == 99 |
DE_DEGR == 1, NA, DE_DEGR))
table(my.df$DE_DEGR_m, useNA = "ifany")
##
## 2 3 4 5 6 7 8 <NA>
## 20 456 434 64 138 77 187 19
attributes(my.df$DE_DEGR_m)$labels
## NULL
my.df$educ3 <- NA
my.df$educ3[my.df$DE_DEGR_m == 2 | my.df$DE_DEGR_m == 3 | my.df$DE_DEGR_m == 4] <- "Primary"
my.df$educ3[my.df$DE_DEGR_m == 5 | my.df$DE_DEGR_m == 6] <- "Secondary"
my.df$educ3[my.df$DE_DEGR_m == 7 | my.df$DE_DEGR_m == 8] <- "Tertiary"
my.df$educ3 <- factor(my.df$educ3, levels = c("Secondary", "Primary", "Tertiary"),
labels = c("Secondary", "Primary", "Tertiary"))
class(my.df$educ3)
## [1] "factor"
table(my.df$educ3, useNA = "ifany")
##
## Secondary Primary Tertiary <NA>
## 202 910 264 19
levels(my.df$educ3)
## [1] "Secondary" "Primary" "Tertiary"
varsinmodel.vc <-c("incdiff100",
"social_mob",
"p_p_justice",
"middleclass_soc",
"female",
"AGE_m",
"educ3",
"V66_m")
my.df.filtered <- my.df[varsinmodel.vc]
View(my.df.filtered)
nrow(my.df.filtered)
## [1] 1395
names(my.df.filtered)
## [1] "incdiff100" "social_mob" "p_p_justice" "middleclass_soc"
## [5] "female" "AGE_m" "educ3" "V66_m"
#Listwise deletion
my.df.listw <- na.omit(my.df.filtered)
View(my.df.listw)
nrow(my.df.listw)
## [1] 1020
m.lw <- lm(incdiff100 ~ social_mob + p_p_justice + middleclass_soc + female + AGE_m + educ3 + V66_m, data = my.df.listw)
m.lw
##
## Call:
## lm(formula = incdiff100 ~ social_mob + p_p_justice + middleclass_soc +
## female + AGE_m + educ3 + V66_m, data = my.df.listw)
##
## Coefficients:
## (Intercept) social_mob p_p_justice middleclass_soc
## 20.0448 0.2688 -0.1649 2.0589
## female AGE_m educ3Primary educ3Tertiary
## -1.6800 -0.0860 -6.3768 1.2449
## V66_m
## 3.2082
library(stargazer)
stargazer(m.lw,
title = "",
covariate.labels = c("Respondents social mobility",
"Respondents perceived procedural justice",
"Middle class society",
"Respondents Gender: Female",
"Respondents Age",
"Education: Primary",
"Education: Tertiary",
"Respondents perceived social class",
"Intercept"),
keep.stat = c("n", "rsq"))
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Di, Sep 06, 2022 - 11:27:10
The model tries to explain dependent variable (acceptance of income inequality, scaled 0 to 100) using the respondents social mobility, perceived procedural justice, if the respondent would identify Germany to be a middle class society, gender, age, education and the perceived social class.
The model includes 1020 observations and observations were deleted using listwise deletion. R-squared equals 0.149 and this means that 14.9% of the variance of the dependent variable can be explained using this data, model and variable selection. 14.9% is a relative low figure but the questionnaire is highly subjective. For example the questions regarding perceived procedural justice/injustice and perceived social class are already point towards the subjectivity of some questions. This could explain the low explained variance and the fact that this is a linear model in the social sciences.
With every point of upward mobility of the respondent the acceptance of income inequality lessens (moves towards 100) by 0.269 points. In the extremes this means that a respondent with total upward mobility (family class 1, respondent class 10) would gain 2.69 points. This is rather insignificant, because the on a one to 100 scale 2.69 points are miniature and the significance level is not under 0.05. The next independent variable is also scaled 0 (highest perception of procedural justice) to 100 (lowest perception of procedural justice). The higher the perceived procedural injustice the lower the respondents acceptance of income inequalities is. The effect is significant and with 1 point increase on the perceived procedural justice variable the respondents scores -0.165 lower on the acceptance of income difference scale. In the extreme case this means the respondent scores up to 16.5 points lower on the acceptance of income inequalities scale.
If the respondent thinks Germany is a middle class society the respondent scores 2.059 points higher on the acceptance of income inequalities scale. This effect is significant. If the respondent is a female the respondent scores 1.68 points lower on the acceptance of income inequalities scale, however the effect is not statistically significant. For every year of age the respondent scores 0.086 lower on the acceptance of inequalities scale. This effect is statistically significant, but the effect is rather small.
The education variable variable is split into three categories: Secondary (reference category), primary and tertiary. The label stands for the highest completed education of the respondent. If the respondent has only completed primary education (or quit school) the respondent scores on average 6.377 points lower than other respondents who have completed secondary education. If the respondent has completed tertiary education the score on the acceptance of income inequalities scale is on average 1.245 higher than their counterparts which completed only secondary education. This means that the higher the education the higher the acceptance of income inequality gets. However, only the effect of primary education is statistically significant.
The last independent variable describes the respondents perceived social class. The higher the perceived social class (1 to 6 scale, 1 = Lower class 6 = Upper class) of the respondent is the higher their acceptance of income inequalities becomes. For every additional point on the 1 to 6 scale (a higher class) the respondent scores on average 3.208 points higher on the acceptance of income inequalities scale. The effect is statistically significant.
Because the y-axis intercept would only be relevant to interpret if all the independent variables could all equal 0 at one point, and that is highly unlikely as the respondents social mobility and social class never equal 0, I will leave the y-axis intercept out of the interpretation.
coefficients(m.lw)[-1]
## social_mob p_p_justice middleclass_soc female AGE_m
## 0.26877357 -0.16488036 2.05890888 -1.67999885 -0.08599975
## educ3Primary educ3Tertiary V66_m
## -6.37684709 1.24491289 3.20820616
names(coefficients(m.lw)[-1])
## [1] "social_mob" "p_p_justice" "middleclass_soc" "female"
## [5] "AGE_m" "educ3Primary" "educ3Tertiary" "V66_m"
m.df <- data.frame(varname = names(coefficients(m.lw)[-1]),
bvalue = coefficients(m.lw)[-1])
variable_labels.vc <- c("Respondents social mobility",
"Respondents perceived procedural justice",
"Middle class society",
"Respondents Gender: Female",
"Respondents Age",
"Education: Primary",
"Education: Tertiary",
"Respondents perceived social class")
m.df <- data.frame(varname = variable_labels.vc,
bvalue = coefficients(m.lw)[-1])
library(ggplot2)
ggplot()+
geom_point(data = m.df, aes(x = bvalue, y = varname, color = "Full linear Model"))