Query GPT on a dataframe's column

Usage

query_gpt_on_column(
  db,
  text_column,
  sys_prompt = NULL,
  usr_prompt = NULL,
  closing = NULL,
  model = "gpt-4o-mini",
  quiet = TRUE,
  max_try = 10,
  temperature = 0,
  max_tokens = NULL,
  endpoint = "https://api.openai.com/v1/chat/completions",
  add = TRUE,
  simplify = TRUE,
  na_if_error = FALSE,
  res_name = "gpt_res",
  .progress = TRUE,
  seed = NULL,
  use_py = FALSE
)

Arguments

db: (data.frame) the data to use
text_column: (chr) the name of the column containing the text data
sys_prompt: (chr) the system prompt to use
usr_prompt: (chr) the user prompt to use
closing: (chr, default = NULL) Text to include at the end of the prompt
model: (chr, default = "gpt-4o-mini") the model to use
quiet: (lgl, default = TRUE) whether to print information
max_try: (int, default = 10) the maximum number of tries
temperature: (dbl, default = 0) the temperature to use
max_tokens: (dbl, default = 1000) the maximum number of tokens
endpoint: (chr, default = "https://api.openai.com/v1/chat/completions", i.e. the OpenAI API) the endpoint to use for the request.
add: (lgl, default = TRUE) whether to add the result to the original dataframe. If FALSE, it returns a tibble with the result only.
simplify: (lgl, default = TRUE) whether to simplify the output
na_if_error: (lgl, default = FALSE) whether to return NA if an error occurs
res_name: (chr, default = "gpt_res") the name of the column containing the result
.progress: (lgl, default = TRUE) whether to show a progress bar or not
seed: (chr, default = NULL) a string to seed the random number
use_py: (lgl, default = FALSE) whether to use python or not

Value

(tibble) the result of the query

Examples

if (FALSE) {

 db <- tibble(
   commenti = c(
     "deadly boring!",
     "A bit boring, but interesting",
     "How nice, I loved it!"
   )
 )

 role <- "Sei l'assistente di un docente universitario."
 context <- "State analizzando i commenti degli studenti dell'ultimo corso."
 task <- "Il tuo compito è capire se sono soddisfatti del corso."
 instructions <- "Analizza i commenti e decidi se sono soddisfatti o meno."
 output <- "Riporta 'soddisfatto' o 'insoddisfatto'."
 style <- "Non aggiungere nessun commento, restituisci solo ed
   esclusivamente la classificazione."
 examples <- "
 commento_1: 'Mi è piaciuto molto il corso; davvero interessante.'
 classificazione_1: 'soddisfatto'
 commento_2: 'Non mi è piaciuto per niente; una noia mortale'
 classificazione_2: 'insoddisfatto'
 "

 sys_prompt <- compose_sys_prompt(role = role, context = context)
 usr_prompt <- compose_usr_prompt(
   task = task, instructions = instructions, output = output,
   style = style, examples = examples
 )
 res <- db |>
  query_gpt_on_column(
    "commenti", sys_prompt = sys_prompt, usr_prompt = usr_prompt
  )
 res
}