87 Lecture: Applications of Large Language Models in Data Science

You can follow along with the slides here if they do not appear below.

87.1 Use Cases in Data Science

87.1.1 R Example: Text Classification (Sentiment Analysis)

library(tidytext)
library(dplyr)
library(ggplot2)

# Sample text data
texts <- c(
  "I love this product! It's amazing.",
  "This is terrible. I hate it.",
  "It's okay, nothing special.",
  "Wow, absolutely fantastic experience!",
  "Disappointed with the quality."
)

# Create a tibble
df <- tibble(text = texts)


# show the data

df %>%
  unnest_tokens(word, text) %>%
  inner_join(get_sentiments("bing"))
#> Joining with `by = join_by(word)`
#> # A tibble: 7 × 2
#>   word         sentiment
#>   <chr>        <chr>    
#> 1 love         positive 
#> 2 amazing      positive 
#> 3 terrible     negative 
#> 4 hate         negative 
#> 5 wow          positive 
#> 6 fantastic    positive 
#> 7 disappointed negative

## Tokenize and Perform Sentiment Analysis
sentiment_scores <- df %>%
  unnest_tokens(word, text) %>%
  inner_join(get_sentiments("bing")) %>%
  count(sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment_score = positive - negative)
#> Joining with `by = join_by(word)`

sentiment_scores
#> # A tibble: 1 × 3
#>   negative positive sentiment_score
#>      <dbl>    <dbl>           <dbl>
#> 1        3        4               1

## Visualize Sentiment Analysis Results
df %>%
  unnest_tokens(word, text) %>%
  inner_join(get_sentiments("bing")) %>%
ggplot(aes(x = sentiment)) +
  geom_bar()+
  coord_flip() + theme_classic()+
  labs(title = "Overall Vibes", x = "", y = "Count")
#> Joining with `by = join_by(word)`

87.1.2 Text Generation (Simple Markov Chain)

library(tidytext)
library(dplyr)
library(stringr)

# Sample text
text <- "The quick brown fox jumps over the lazy dog. The dog barks at the fox. The fox runs away quickly."

# Tokenize and create word pairs
word_pairs <- tibble(text = text) %>%
  unnest_tokens(word, text) %>%
  mutate(next_word = lead(word)) %>%
  na.omit()

# Create a simple Markov chain
markov_chain <- word_pairs %>%
  group_by(word) %>%
  summarise(next_words = list(next_word))

# Generate text
generate_text <- function(start_word, length = 10) {
  result <- start_word
  current_word <- start_word
  
  for (i in 1:length) {
    next_word_options <- markov_chain %>%
      filter(word == current_word) %>%
      pull(next_words) %>%
      unlist()
    
    if (length(next_word_options) == 0) break
    
    next_word <- sample(next_word_options, 1)
    result <- c(result, next_word)
    current_word <- next_word
  }
  
  str_c(result, collapse = " ")
}

# Generate a sentence
generate_text("the")
#> [1] "the quick brown fox jumps over the dog barks at the"