This is the the second survey which consisted the following questions like- Which gender gets trolled by which other gender? Which gender gets more emotionally hurt? Which gender gets more angered by trolling?
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
## # A tibble: 6 x 24
## StartDate EndDate Status IPAddress Progress `Duration (in s… Finished
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Start Date End Date Respon… IP Addre… Progress Duration (in se… Finished
## 2 "{\"Impor… "{\"Imp… "{\"Im… "{\"Impo… "{\"Imp… "{\"ImportId\":… "{\"Imp…
## 3 2018-04-1… 2018-04… 0 68.51.12… 100 17 1
## 4 2018-04-1… 2018-04… 0 146.227.… 100 85 1
## 5 2018-04-1… 2018-04… 0 86.150.3… 100 47 1
## 6 2018-04-1… 2018-04… 0 71.121.2… 100 12 1
## # ... with 17 more variables: RecordedDate <chr>, ResponseId <chr>,
## # RecipientLastName <chr>, RecipientFirstName <chr>,
## # RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## # LocationLongitude <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q1 <chr>, Q2_1 <chr>, Q2_2 <chr>, Q5 <chr>,
## # Q5_3_TEXT <chr>, Q3 <chr>, Q3_3_TEXT <chr>
## # A tibble: 6 x 25
## id StartDate EndDate Status IPAddress Progress `Duration (in se…
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 2018-04-15… 2018-04-1… 0 68.51.12… 100 17
## 2 2 2018-04-15… 2018-04-1… 0 146.227.… 100 85
## 3 3 2018-04-15… 2018-04-1… 0 86.150.3… 100 47
## 4 4 2018-04-15… 2018-04-1… 0 71.121.2… 100 12
## 5 5 2018-04-15… 2018-04-1… 0 77.173.1… 100 16
## 6 6 2018-04-15… 2018-04-1… 0 71.197.2… 100 12
## # ... with 18 more variables: Finished <chr>, RecordedDate <chr>,
## # ResponseId <chr>, RecipientLastName <chr>, RecipientFirstName <chr>,
## # RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## # LocationLongitude <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q1 <chr>, Q2_1 <chr>, Q2_2 <chr>, Q5 <chr>,
## # Q5_3_TEXT <chr>, Q3 <chr>, Q3_3_TEXT <chr>
1= Male 2= Female 4= Don’t remember 5=Never been trolled
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
mmff <- dcast(data %>% select(Q3, Q5), Q3 ~ Q5)
## Using Q5 as value column: use value.var to override.
## Aggregation function missing: defaulting to length
filter(data, Q5=="1"|Q5=="2")
## # A tibble: 18 x 25
## id StartDate EndDate Status IPAddress Progress `Duration (in se…
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2 2018-04-15… 2018-04-… 0 146.227.… 100 85
## 2 3 2018-04-15… 2018-04-… 0 86.150.3… 100 47
## 3 4 2018-04-15… 2018-04-… 0 71.121.2… 100 12
## 4 9 2018-04-15… 2018-04-… 0 83.250.8… 100 21
## 5 11 2018-04-15… 2018-04-… 0 104.2.24… 100 12
## 6 12 2018-04-15… 2018-04-… 0 85.210.1… 100 19
## 7 16 2018-04-17… 2018-04-… 0 71.71.11… 100 30
## 8 17 2018-04-17… 2018-04-… 0 106.210.… 100 33
## 9 21 2018-04-17… 2018-04-… 0 42.107.6… 100 34
## 10 25 2018-04-17… 2018-04-… 0 80.121.7… 100 25
## 11 30 2018-04-17… 2018-04-… 0 183.82.1… 100 48
## 12 31 2018-04-17… 2018-04-… 0 116.109.… 100 67
## 13 32 2018-04-17… 2018-04-… 0 27.97.30… 100 35
## 14 38 2018-04-17… 2018-04-… 0 49.33.75… 100 97
## 15 39 2018-04-17… 2018-04-… 8 49.33.75… 100 13
## 16 44 2018-04-17… 2018-04-… 0 49.32.16… 100 34
## 17 48 2018-04-17… 2018-04-… 0 151.66.2… 100 49
## 18 50 2018-04-18… 2018-04-… 0 106.216.… 100 58
## # ... with 18 more variables: Finished <chr>, RecordedDate <chr>,
## # ResponseId <chr>, RecipientLastName <chr>, RecipientFirstName <chr>,
## # RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## # LocationLongitude <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q1 <chr>, Q2_1 <chr>, Q2_2 <chr>, Q5 <chr>,
## # Q5_3_TEXT <chr>, Q3 <chr>, Q3_3_TEXT <chr>
print (mmff)
## Q3 1 2 4 5
## 1 1 10 1 3 15
## 2 2 4 3 1 13
data <- data %>% mutate_at(vars(starts_with("Q2_1")), as.numeric)
data <- data %>% mutate(emo_hurt=
(6-Q2_1))
data %>% select(emo_hurt, starts_with("Q2_1"))
## # A tibble: 50 x 2
## emo_hurt Q2_1
## <dbl> <dbl>
## 1 0 6.00
## 2 2.00 4.00
## 3 4.00 2.00
## 4 1.00 5.00
## 5 0 6.00
## 6 0 6.00
## 7 0 6.00
## 8 0 6.00
## 9 3.00 3.00
## 10 0 6.00
## # ... with 40 more rows
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
# define function to output 95% confidence interval (z-score = 1.96) string
ci95 <- function(x) {
glue("({format(mean(x)-1.96*sd(x), digits=3)}, {format(mean(x)+1.96*sd(x), digits=
3)})")
}
data %>% summarise(mean=mean(emo_hurt),
SD=sd(emo_hurt),
CI=ci95(emo_hurt))
## # A tibble: 1 x 3
## mean SD CI
## <dbl> <dbl> <chr>
## 1 1.24 1.52 (-1.74, 4.22)
desc <- data %>% group_by(Q3) %>% summarise(mean=mean(emo_hurt),
SD=sd(emo_hurt),
CI=ci95(emo_hurt))
desc
## # A tibble: 2 x 4
## Q3 mean SD CI
## <chr> <dbl> <dbl> <chr>
## 1 1 1.31 1.51 (-1.66, 4.28)
## 2 2 1.14 1.56 (-1.91, 4.2)
males <- data %>% filter(Q3 == 1) %>% pull(emo_hurt)
females <- data %>% filter(Q3 == 2) %>% pull(emo_hurt)
data %>% filter(Q3 == 1) %>% ggplot(aes(emo_hurt)) + geom_histogram(binwidth=2)
data %>% filter(Q3 == 2) %>% ggplot(aes(emo_hurt)) + geom_histogram(binwidth=2)
t.test(males, females, var.equal=T)
##
## Two Sample t-test
##
## data: males and females
## t = 0.38134, df = 48, p-value = 0.7046
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7155958 1.0505711
## sample estimates:
## mean of x mean of y
## 1.310345 1.142857
data <- data %>% mutate_at(vars(starts_with("Q2_2")), as.numeric)
data <- data %>% mutate(deg_anger=
(6-Q2_2))
data %>% select(deg_anger, starts_with("Q2_2"))
## # A tibble: 50 x 2
## deg_anger Q2_2
## <dbl> <dbl>
## 1 0 6.00
## 2 2.00 4.00
## 3 3.00 3.00
## 4 1.00 5.00
## 5 0 6.00
## 6 0 6.00
## 7 0 6.00
## 8 0 6.00
## 9 4.00 2.00
## 10 0 6.00
## # ... with 40 more rows
library(glue)
# define function to output 95% confidence interval (z-score = 1.96) string
ci95 <- function(x) {
glue("({format(mean(x)-1.96*sd(x), digits=3)}, {format(mean(x)+1.96*sd(x), digits=
3)})")
}
data %>% summarise(mean=mean(deg_anger),
SD=sd(deg_anger),
CI=ci95(deg_anger))
## # A tibble: 1 x 3
## mean SD CI
## <dbl> <dbl> <chr>
## 1 1.40 1.71 (-1.96, 4.76)
desc2 <- data %>% group_by(Q3) %>% summarise(mean=mean(deg_anger),
SD=sd(deg_anger),
CI=ci95(deg_anger))
desc2
## # A tibble: 2 x 4
## Q3 mean SD CI
## <chr> <dbl> <dbl> <chr>
## 1 1 1.48 1.68 (-1.81, 4.78)
## 2 2 1.29 1.79 (-2.23, 4.8)
males <- data %>% filter(Q3 == 1) %>% pull(deg_anger)
females <- data %>% filter(Q3 == 2) %>% pull(deg_anger)
data %>% filter(Q3 == 1) %>% ggplot(aes(deg_anger)) + geom_histogram(binwidth=2)
data %>% filter(Q3 == 2) %>% ggplot(aes(deg_anger)) + geom_histogram(binwidth=2)
t.test(males, females, var.equal=T)
##
## Two Sample t-test
##
## data: males and females
## t = 0.39769, df = 48, p-value = 0.6926
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7991771 1.1932657
## sample estimates:
## mean of x mean of y
## 1.482759 1.285714