Sanchit Soni

This is the the second survey which consisted the following questions like- Which gender gets trolled by which other gender? Which gender gets more emotionally hurt? Which gender gets more angered by trolling?

## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
## # A tibble: 6 x 24
##   StartDate  EndDate  Status  IPAddress Progress `Duration (in s… Finished
##   <chr>      <chr>    <chr>   <chr>     <chr>    <chr>            <chr>   
## 1 Start Date End Date Respon… IP Addre… Progress Duration (in se… Finished
## 2 "{\"Impor… "{\"Imp… "{\"Im… "{\"Impo… "{\"Imp… "{\"ImportId\":… "{\"Imp…
## 3 2018-04-1… 2018-04… 0       68.51.12… 100      17               1       
## 4 2018-04-1… 2018-04… 0       146.227.… 100      85               1       
## 5 2018-04-1… 2018-04… 0       86.150.3… 100      47               1       
## 6 2018-04-1… 2018-04… 0       71.121.2… 100      12               1       
## # ... with 17 more variables: RecordedDate <chr>, ResponseId <chr>,
## #   RecipientLastName <chr>, RecipientFirstName <chr>,
## #   RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## #   LocationLongitude <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q1 <chr>, Q2_1 <chr>, Q2_2 <chr>, Q5 <chr>,
## #   Q5_3_TEXT <chr>, Q3 <chr>, Q3_3_TEXT <chr>
## # A tibble: 6 x 25
##   id    StartDate   EndDate    Status IPAddress Progress `Duration (in se…
##   <chr> <chr>       <chr>      <chr>  <chr>     <chr>    <chr>            
## 1 1     2018-04-15… 2018-04-1… 0      68.51.12… 100      17               
## 2 2     2018-04-15… 2018-04-1… 0      146.227.… 100      85               
## 3 3     2018-04-15… 2018-04-1… 0      86.150.3… 100      47               
## 4 4     2018-04-15… 2018-04-1… 0      71.121.2… 100      12               
## 5 5     2018-04-15… 2018-04-1… 0      77.173.1… 100      16               
## 6 6     2018-04-15… 2018-04-1… 0      71.197.2… 100      12               
## # ... with 18 more variables: Finished <chr>, RecordedDate <chr>,
## #   ResponseId <chr>, RecipientLastName <chr>, RecipientFirstName <chr>,
## #   RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## #   LocationLongitude <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q1 <chr>, Q2_1 <chr>, Q2_2 <chr>, Q5 <chr>,
## #   Q5_3_TEXT <chr>, Q3 <chr>, Q3_3_TEXT <chr>

Which gender gets trolled by which other gender?

1= Male 2= Female 4= Don’t remember 5=Never been trolled

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
mmff <- dcast(data %>% select(Q3, Q5), Q3 ~ Q5)
## Using Q5 as value column: use value.var to override.
## Aggregation function missing: defaulting to length
filter(data, Q5=="1"|Q5=="2")
## # A tibble: 18 x 25
##    id    StartDate   EndDate   Status IPAddress Progress `Duration (in se…
##    <chr> <chr>       <chr>     <chr>  <chr>     <chr>    <chr>            
##  1 2     2018-04-15… 2018-04-… 0      146.227.… 100      85               
##  2 3     2018-04-15… 2018-04-… 0      86.150.3… 100      47               
##  3 4     2018-04-15… 2018-04-… 0      71.121.2… 100      12               
##  4 9     2018-04-15… 2018-04-… 0      83.250.8… 100      21               
##  5 11    2018-04-15… 2018-04-… 0      104.2.24… 100      12               
##  6 12    2018-04-15… 2018-04-… 0      85.210.1… 100      19               
##  7 16    2018-04-17… 2018-04-… 0      71.71.11… 100      30               
##  8 17    2018-04-17… 2018-04-… 0      106.210.… 100      33               
##  9 21    2018-04-17… 2018-04-… 0      42.107.6… 100      34               
## 10 25    2018-04-17… 2018-04-… 0      80.121.7… 100      25               
## 11 30    2018-04-17… 2018-04-… 0      183.82.1… 100      48               
## 12 31    2018-04-17… 2018-04-… 0      116.109.… 100      67               
## 13 32    2018-04-17… 2018-04-… 0      27.97.30… 100      35               
## 14 38    2018-04-17… 2018-04-… 0      49.33.75… 100      97               
## 15 39    2018-04-17… 2018-04-… 8      49.33.75… 100      13               
## 16 44    2018-04-17… 2018-04-… 0      49.32.16… 100      34               
## 17 48    2018-04-17… 2018-04-… 0      151.66.2… 100      49               
## 18 50    2018-04-18… 2018-04-… 0      106.216.… 100      58               
## # ... with 18 more variables: Finished <chr>, RecordedDate <chr>,
## #   ResponseId <chr>, RecipientLastName <chr>, RecipientFirstName <chr>,
## #   RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## #   LocationLongitude <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q1 <chr>, Q2_1 <chr>, Q2_2 <chr>, Q5 <chr>,
## #   Q5_3_TEXT <chr>, Q3 <chr>, Q3_3_TEXT <chr>
print (mmff)
##   Q3  1 2 4  5
## 1  1 10 1 3 15
## 2  2  4 3 1 13

Which gender gets more emotionally hurt?

data <- data %>% mutate_at(vars(starts_with("Q2_1")), as.numeric)
data <- data %>% mutate(emo_hurt=
(6-Q2_1))
data %>% select(emo_hurt, starts_with("Q2_1"))
## # A tibble: 50 x 2
##    emo_hurt  Q2_1
##       <dbl> <dbl>
##  1     0     6.00
##  2     2.00  4.00
##  3     4.00  2.00
##  4     1.00  5.00
##  5     0     6.00
##  6     0     6.00
##  7     0     6.00
##  8     0     6.00
##  9     3.00  3.00
## 10     0     6.00
## # ... with 40 more rows
library(glue)
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
# define function to output 95% confidence interval (z-score = 1.96) string
ci95 <- function(x) {
glue("({format(mean(x)-1.96*sd(x), digits=3)}, {format(mean(x)+1.96*sd(x), digits=
3)})")
}
data %>% summarise(mean=mean(emo_hurt),
SD=sd(emo_hurt),
CI=ci95(emo_hurt))
## # A tibble: 1 x 3
##    mean    SD CI           
##   <dbl> <dbl> <chr>        
## 1  1.24  1.52 (-1.74, 4.22)

I am interested in whether there is a difference in emotional hurt scores between females and males.

desc <- data %>% group_by(Q3) %>% summarise(mean=mean(emo_hurt),
SD=sd(emo_hurt),
CI=ci95(emo_hurt))
desc
## # A tibble: 2 x 4
##   Q3     mean    SD CI           
##   <chr> <dbl> <dbl> <chr>        
## 1 1      1.31  1.51 (-1.66, 4.28)
## 2 2      1.14  1.56 (-1.91, 4.2)
males <- data %>% filter(Q3 == 1) %>% pull(emo_hurt)
females <- data %>% filter(Q3 == 2) %>% pull(emo_hurt)
data %>% filter(Q3 == 1) %>% ggplot(aes(emo_hurt)) + geom_histogram(binwidth=2)

data %>% filter(Q3 == 2) %>% ggplot(aes(emo_hurt)) + geom_histogram(binwidth=2)

t.test(males, females, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  males and females
## t = 0.38134, df = 48, p-value = 0.7046
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7155958  1.0505711
## sample estimates:
## mean of x mean of y 
##  1.310345  1.142857

Which gender gets more angered by trolling?

data <- data %>% mutate_at(vars(starts_with("Q2_2")), as.numeric)
data <- data %>% mutate(deg_anger=
(6-Q2_2))
data %>% select(deg_anger, starts_with("Q2_2"))
## # A tibble: 50 x 2
##    deg_anger  Q2_2
##        <dbl> <dbl>
##  1      0     6.00
##  2      2.00  4.00
##  3      3.00  3.00
##  4      1.00  5.00
##  5      0     6.00
##  6      0     6.00
##  7      0     6.00
##  8      0     6.00
##  9      4.00  2.00
## 10      0     6.00
## # ... with 40 more rows
library(glue)
# define function to output 95% confidence interval (z-score = 1.96) string
ci95 <- function(x) {
glue("({format(mean(x)-1.96*sd(x), digits=3)}, {format(mean(x)+1.96*sd(x), digits=
3)})")
}
data %>% summarise(mean=mean(deg_anger),
SD=sd(deg_anger),
CI=ci95(deg_anger))
## # A tibble: 1 x 3
##    mean    SD CI           
##   <dbl> <dbl> <chr>        
## 1  1.40  1.71 (-1.96, 4.76)
desc2 <- data %>% group_by(Q3) %>% summarise(mean=mean(deg_anger),
SD=sd(deg_anger),
CI=ci95(deg_anger))
desc2
## # A tibble: 2 x 4
##   Q3     mean    SD CI           
##   <chr> <dbl> <dbl> <chr>        
## 1 1      1.48  1.68 (-1.81, 4.78)
## 2 2      1.29  1.79 (-2.23, 4.8)
males <- data %>% filter(Q3 == 1) %>% pull(deg_anger)
females <- data %>% filter(Q3 == 2) %>% pull(deg_anger)
data %>% filter(Q3 == 1) %>% ggplot(aes(deg_anger)) + geom_histogram(binwidth=2)

data %>% filter(Q3 == 2) %>% ggplot(aes(deg_anger)) + geom_histogram(binwidth=2)

t.test(males, females, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  males and females
## t = 0.39769, df = 48, p-value = 0.6926
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7991771  1.1932657
## sample estimates:
## mean of x mean of y 
##  1.482759  1.285714