Step 1: Create A CSV File: # For Text Mining

Step 1: Create a csv file
Step 2 : Install and load the required packages
# for text mining

install.packages("tm")
# for text stemming
install.packages("SnowballC")
# for word-cloud generator
install.packages("wordcloud")
# for colour palettes
install.packages("RColorBrewer")
# Load
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
Step 3 : Text mining
load the text
text <- readLines(file.choose())
2. Load the data as a corpus
docs <- Corpus(VectorSource(text))
3. Inspect the content of the document
inspect(docs)
Build a term-document matrix
dtm <- TermDocumentMatrix(docs)

m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
Text transformation
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))

docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "\\|")
Cleaning the text
# Convert the text to lower case

docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word # specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
docs <- tm_map(docs, stemDocument)
Step 4 : Build a term-document matrix
dtm <- TermDocumentMatrix(docs)

m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
Step 5 : Generate the Word cloud
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
Explore frequent terms and their associations
findFreqTerms(dtm, lowfreq = 4)
findAssocs(dtm, terms = "app", corlimit = 0.3)

Plot word frequencies
barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word, col ="lightblue", main ="Most

frequent words", ylab = "Word frequencies")
Read the text file from internet

filePath <- "http://www.sthda.com/sthda/RDoc/example-files/martin-luther-king-i-have-a-
dream-speech.txt"
text <- readLines(filePath)
How to fetch Data from Tweeter?
Go to
https://developer.twitter.com/en/apps
Create your API
consumerKey="RsB4yZ4VkWSGm8ENNFT0b3TjS"
consumerSecret="FhkzngdQAXlRrxGzpnlix0vnHYpEEk1rWJDSZQxqHjG0knQMDp"
accessToken="707517769739214848-ViNwLenOczx1PRl8rjQJb8Epl808JJy"
accessTokenSecret="IEXcppF7iLMqXISguvAbh5q9VTsXGb9pTIAcfaIe8uFpE"
setup_twitter_oauth(consumerKey,consumerSecret,accessToken,accessTokenSecret)
tweets= searchTwitter("dearicaipleasechange", n=1000, lang="en")
How to save the tweets?

tweet.df <- twListToDF(tweets)
write.csv(tweet.df, "tweets1.csv")
getwd()
Sentiment Analysis
install.packages("SentimentAnalysis")
library(SentimentAnalysis)
# Analyze a single string to obtain a binary response (positive / negative)

sentiment <- analyzeSentiment("Yeah, this was a great soccer game for the German team!")
convertToBinaryResponse(sentiment)$SentimentQDAP
# Create a vector of strings

documents <- c("Wow, I really like the new light sabers!",
"That book was excellent.",
"R is a fantastic language.",
"The service in this restaurant was miserable.",
"This is neither positive or negative.",
"The waiter forget about my dessert -- what poor service!")
sentiment <- analyzeSentiment(documents)

sentiment$SentimentQDAP
# View sentiment direction (i.e. positive, neutral and negative)
convertToDirection(sentiment$SentimentQDAP)
For available documents
sentiment <- analyzeSentiment(documents)
For library documents
library(tm)
data("crude")
# Analyze sentiment
sentiment <- analyzeSentiment(crude)
# Count positive and negative news releases

table(convertToBinaryResponse(sentiment$SentimentLM))
#News releases with highest and lowest sentiment
crude[[which.max(sentiment$SentimentLM)]]$meta$heading
crude[[which.min(sentiment$SentimentLM)]]$meta$heading
# Visualize distribution of standardized sentiment variable
hist(scale(sentiment$SentimentLM))
# Compute cross-correlation
cor(sentiment[, c("SentimentLM", "SentimentHE", "SentimentQDAP")])
# crude oil news between 1987-02-26 until 1987-03-02
datetime <- do.call(c, lapply(crude, function(x) x$meta$datetimestamp))
plotSentiment(sentiment$SentimentLM)
Twitter Sentiment Analysis
# Install Requried Packages

installed.packages("SnowballC")
installed.packages("tm")
installed.packages("twitteR")
installed.packages("syuzhet")
tweets.df <- twListToDF(tweets)

head(tweets.df)
head(tweets.df$text)
#Remove URLs, hashtags and other twitter handles
tweets.df2 <- gsub("http.*","",tweets.df$text)
tweets.df2 <- gsub("https.*","",tweets.df2)
tweets.df2 <- gsub("#.*","",tweets.df2)
tweets.df2 <- gsub("@.*","",tweets.df2)
We will first try to get the emotion score for each of the tweets. ‘Syuzhet’ breaks the
emotion into 10 different emotions – anger, anticipation, disgust, fear, joy, sadness,
surprise, trust, negative and positive.
word.df <- as.vector(tweets.df2)
emotion.df <- get_nrc_sentiment(word.df)
emotion.df2 <- cbind(tweets.df2, emotion.df)
head(emotion.df2)
Most Positive Comment
sent.value <- get_sentiment(word.df)
most.positive <- word.df[sent.value == max(sent.value)]
most.positive
most.negative <- word.df[sent.value <= min(sent.value)]

most.negative
Sentiment score for each tweets

sent.value
Segregating positive and negative tweets
positive.tweets <- word.df[sent.value > 0]
head(positive.tweets)
negative.tweets <- word.df[sent.value < 0]

head(negative.tweets)
neutral.tweets <- word.df[sent.value == 0]

head(neutral.tweets)

Step 1: Create A CSV File: # For Text Mining

Загружено:

Сведения о документе

Оригинальное название

Авторское право

Доступные форматы

Поделиться этим документом

Поделиться или встроить документ

Параметры публикации

Этот документ был вам полезен?

Это неприемлемый материал?

Авторское право:

Доступные форматы

Step 1: Create A CSV File: # For Text Mining

Загружено:

Авторское право:

Доступные форматы

Step 1: Create a csv file

Step 2 : Install and load the required packages

# for text mining

Step 3 : Text mining

load the text

text <- readLines(file.choose())

2. Load the data as a corpus

docs <- Corpus(VectorSource(text))

3. Inspect the content of the document

dtm <- TermDocumentMatrix(docs)

toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))

Cleaning the text

# Convert the text to lower case

Step 4 : Build a term-document matrix

dtm <- TermDocumentMatrix(docs)

Step 5 : Generate the Word cloud

wordcloud(words = d$word, freq = d$freq, min.freq = 1,

max.words=200, random.order=FALSE, rot.per=0.35,

Explore frequent terms and their associations

findAssocs(dtm, terms = "app", corlimit = 0.3)

barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word, col ="lightblue", main ="Most

Read the text file from internet

Create your API

How to save the tweets?

# Analyze a single string to obtain a binary response (positive / negative)

# Create a vector of strings

sentiment <- analyzeSentiment(documents)

# View sentiment direction (i.e. positive, neutral and negative)

# Count positive and negative news releases

# Install Requried Packages

tweets.df <- twListToDF(tweets)

sent.value <- get_sentiment(word.df)

most.positive <- word.df[sent.value == max(sent.value)]

most.negative <- word.df[sent.value <= min(sent.value)]

Sentiment score for each tweets

negative.tweets <- word.df[sent.value < 0]

neutral.tweets <- word.df[sent.value == 0]

Вам также может понравиться