November 1, 2020 29 min to read

Tweet War

Trump VS Biden

Tweet War Trump Vs Biden

US Election 2020 Tweets War: Can a US election be determined by tweets? The 2020 US election happened on the 3rd November 2020 and the resulting impact to the world will doubt be large, irrespective of which candidate is elected! #Content ##Tweets collected, using the Twitter API statuses_lookup and snsscrape for keywords, with the original ##intention to try to update this dataset daily so that the timeframe will eventually cover 15.10.2020 and 04.11.2020.

Needed libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(wordcloud)
library(tm)
library(tidytext)
library(SnowballC)
library(R.utils)
library(stringr)
library(stopwords)
library(textdata)

Import the datasets

l2keep <- 100000

column_namesDT <- as.vector(t(read.csv("hashtag_donaldtrump.csv", header=FALSE, colClasses='character', nrows=1)))
nLDT <- countLines("hashtag_donaldtrump.csv")
dfDT <- read.csv("hashtag_donaldtrump.csv", header=FALSE, col.names=column_namesDT,skip=nLDT-l2keep)


column_namesJB <- as.vector(t(read.csv("hashtag_joebiden.csv", header=FALSE, colClasses='character', nrows=1)))
nLJB <- countLines("hashtag_joebiden.csv")
dfJB <- read.csv("hashtag_joebiden.csv")

Clean the Data Donald Trump

#########3Turn text to tokens
tokensDT <- dfDT %>%
  unnest_tokens(output = word, input = tweet)

###########Show common words
tokensDT %>%
  count(word,
        sort = TRUE)

########REmove Stop words
sw = get_stopwords()
cleaned_tokensDT <- tokensDT %>%
  filter(!word %in% sw$word)

#########REmove numbers
numsDT <- cleaned_tokensDT %>%
  filter(str_detect(word, "^[0-9]")) %>%
  select(word) %>% unique()
cleaned_tokensDT <- cleaned_tokensDT %>%
  filter(!word %in% numsDT$word)

###Remove rare words
rareDT <- cleaned_tokensDT %>%
  count(word) %>%
  filter(n<150) %>%
  select(word) %>% unique()

cleaned_tokensDT <- cleaned_tokensDT %>%
  filter(!word %in% rareDT$word)

cleaned_tokensDT = cleaned_tokensDT %>%
  filter(!word %in% c("https", "t.co","ðÿ","à","ø"))

Plot Tokens

#This graph displays the amount of times a word shows up over its frequency for Donald Trump

#########Plot tokens
cleaned_tokensDT %>%
  count(word, sort = T) %>%
  rename(word_freq = n) %>%
  ggplot(aes(x=word_freq)) +
  geom_histogram(aes(y=..count..), color="black", fill="blue", alpha=0.3) +
  scale_x_continuous(breaks=c(0:5,10,100,500,10e3), trans="log1p", expand=c(0,0)) +
  scale_y_continuous(breaks=c(0,100,1000,5e3,10e3,5e4,10e4,4e4), expand=c(0,0)) +
  theme_bw()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Word Cloud for Donald Trump

library(wordcloud)
# define a nice color palette
pal <- brewer.pal(8,"Dark2")
# plot the 100 most common words
cleaned_tokensDT %>%
  count(word) %>%
  with(wordcloud(word, n, random.order = FALSE, max.words = 150, colors=pal))

#Clean the dataJoe Biden Tweets

dfJB %>%
  mutate(tweet = str_remove_all(tweet, "https"))


#########3Turn text to tokens
tokensJB <- dfJB %>%
  unnest_tokens(output = word, input = tweet)

###########Show common words
tokensJB %>%
  count(word,
        sort = TRUE)

########REmove Stop words
sw = get_stopwords()
cleaned_tokensJB <- tokensJB %>%
  filter(!word %in% sw$word)

#########REmove numbers
nums <- cleaned_tokensJB %>%
  filter(str_detect(word, "^[0-9]")) %>%
  select(word) %>% unique()
cleaned_tokensJB <- cleaned_tokensJB %>%
  filter(!word %in% nums$word)

###Remove rare words
rareJB <- cleaned_tokensJB %>%
  count(word) %>%
  filter(n<1000) %>%
  select(word) %>% unique()

cleaned_tokensJB <- cleaned_tokensJB %>%
  filter(!word %in% rareJB$word)
length(unique(cleaned_tokensJB$word))

cleaned_tokensJB = cleaned_tokensDT %>%
  filter(!word %in% c("https", "t.co","ðÿ","à","ø"))

Plot Tokens

#This graph displays the amount of times a word shows up over its frequency for Joe Biden

cleaned_tokensJB %>%
  count(word, sort = T) %>%
  rename(word_freq = n) %>%
  ggplot(aes(x=word_freq)) +
  geom_histogram(aes(y=..count..), color="black", fill="blue", alpha=0.3) +
  scale_x_continuous(breaks=c(0:5,10,100,500,10e3), trans="log1p", expand=c(0,0)) +
  scale_y_continuous(breaks=c(0,100,1000,5e3,10e3,5e4,10e4,4e4), expand=c(0,0)) +
  theme_bw()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Word Cloud for Joe Biden

library(wordcloud)
# define a nice color palette
pal <- brewer.pal(8,"Dark2")
# plot the 100 most common words
cleaned_tokensJB %>%
  count(word) %>%
  with(wordcloud(word, n, random.order = FALSE, max.words = 150, colors=pal))

# Joe Bidens Sentiment Analysis

############Sentiment Analysis
get_sentiments("nrc")

## # A tibble: 13,901 x 2
##    word        sentiment
##    <chr>       <chr>
##  1 abacus      trust
##  2 abandon     fear
##  3 abandon     negative
##  4 abandon     sadness
##  5 abandoned   anger
##  6 abandoned   fear
##  7 abandoned   negative
##  8 abandoned   sadness
##  9 abandonment anger
## 10 abandonment fear
## # ... with 13,891 more rows

get_sentiments("bing")

## # A tibble: 6,786 x 2
##    word        sentiment
##    <chr>       <chr>
##  1 2-faces     negative
##  2 abnormal    negative
##  3 abolish     negative
##  4 abominable  negative
##  5 abominably  negative
##  6 abominate   negative
##  7 abomination negative
##  8 abort       negative
##  9 aborted     negative
## 10 aborts      negative
## # ... with 6,776 more rows

get_sentiments("afinn")

## # A tibble: 2,477 x 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ... with 2,467 more rows

sent_reviewsJB = cleaned_tokensJB %>%
  left_join(get_sentiments("nrc")) %>%
  rename(nrc = sentiment) %>%
  left_join(get_sentiments("bing")) %>%
  rename(bing = sentiment) %>%
  left_join(get_sentiments("afinn")) %>%
  rename(afinn = value)

## Joining, by = "word"
## Joining, by = "word"
## Joining, by = "word"

########Most Common Postivie and negative words
bing_word_countsJB <- sent_reviewsJB %>%
  filter(!is.na(bing)) %>%
  count(word, bing, sort = TRUE)



bing_word_countsJB %>%
  filter(n > 1000) %>%
  mutate(n = ifelse(bing == "negative", -n, n)) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = bing)) +
  geom_col() +
  coord_flip() +
  labs(y = "Contribution to sentiment")

Donald Trump Sentiment Analysis

sent_reviewsDT = cleaned_tokensDT %>%
  left_join(get_sentiments("nrc")) %>%
  rename(nrc = sentiment) %>%
  left_join(get_sentiments("bing")) %>%
  rename(bing = sentiment) %>%
  left_join(get_sentiments("afinn")) %>%
  rename(afinn = value)

## Joining, by = "word"
## Joining, by = "word"
## Joining, by = "word"

########Most Common Postivie and negative words
bing_word_countsDT <- sent_reviewsDT %>%
  filter(!is.na(bing)) %>%
  count(word, bing, sort = TRUE)



bing_word_countsDT %>%
  filter(n > 1000) %>%
  mutate(n = ifelse(bing == "negative", -n, n)) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = bing)) +
  geom_col() +
  coord_flip() +
  labs(y = "Contribution to sentiment")

HenryBernreuter.com v1.0

Tweet War

Needed libraries

Import the datasets

Clean the Data Donald Trump

Plot Tokens

Word Cloud for Donald Trump

Plot Tokens

Word Cloud for Joe Biden

Donald Trump Sentiment Analysis

Explaination

Linear Regression Example

Tweet War

Needed libraries

Import the datasets

Clean the Data Donald Trump

Plot Tokens

Word Cloud for Donald Trump

Plot Tokens

Word Cloud for Joe Biden

Donald Trump Sentiment Analysis

Explaination

I think what these graphs clearly show is that the Biden Social media played very little into the overall election results. In the word cloud for both of them its clear that the name Trump is the most important.

Linear Regression Example

Don't go yet!

Split Variables from one column into several columns in R

Estimating COVID-19s transmission rates in Georgia

Like this page? Consider buying me a cup of coffee or sharing it?