Вы находитесь на странице: 1из 2

library(topicmodels)

data<-read_excel("Facebook_Posts_EricFang.xlsx") ##replace Facebook_Posts.xlsx with the data file


name

colnames(message)

names(message)[names(message) == 'data$company_urlname'] <- 'company'

message

###

tidy_m <- message %>%

unnest_tokens(word, message) %>%

anti_join(stop_words) ##remove stop_words

drop_na(tidy_m)

##

twords=tidy_m %>%

group_by(company) %>%

count(word)%>%

ungroup()

##

tidy_m <- tidy_m %>% drop_na()

tidy_m %>% count(word, sort = TRUE)


## tf-tidf

tidy_m %>%

count(company, word,sort = TRUE) %>%

bind_tf_idf(word, company, n) %>%

group_by(company) %>%

top_n(10) %>%

ungroup %>%

mutate(word = reorder(word, tf_idf)) %>%

ggplot(aes(word, tf_idf, fill = company)) +

geom_col(show.legend = FALSE) +

facet_wrap(~company, scales = 'free') +

coord_flip()

Вам также может понравиться