Skip to contents

Query GPT on a dataframe's column

Usage

query_gpt_on_column(
  db,
  text_column,
  sys_prompt = NULL,
  usr_prompt = NULL,
  closing = NULL,
  model = "gpt-4o-mini",
  quiet = TRUE,
  max_try = 10,
  temperature = 0,
  max_tokens = NULL,
  endpoint = "https://api.openai.com/v1/chat/completions",
  add = TRUE,
  simplify = TRUE,
  na_if_error = FALSE,
  res_name = "gpt_res",
  .progress = TRUE,
  seed = NULL,
  use_py = FALSE
)

Arguments

db

(data.frame) the data to use

text_column

(chr) the name of the column containing the text data

sys_prompt

(chr) the system prompt to use

usr_prompt

(chr) the user prompt to use

closing

(chr, default = NULL) Text to include at the end of the prompt

model

(chr, default = "gpt-4o-mini") the model to use

quiet

(lgl, default = TRUE) whether to print information

max_try

(int, default = 10) the maximum number of tries

temperature

(dbl, default = 0) the temperature to use

max_tokens

(dbl, default = 1000) the maximum number of tokens

endpoint

(chr, default = "https://api.openai.com/v1/chat/completions", i.e. the OpenAI API) the endpoint to use for the request.

add

(lgl, default = TRUE) whether to add the result to the original dataframe. If FALSE, it returns a tibble with the result only.

simplify

(lgl, default = TRUE) whether to simplify the output

na_if_error

(lgl, default = FALSE) whether to return NA if an error occurs

res_name

(chr, default = "gpt_res") the name of the column containing the result

.progress

(lgl, default = TRUE) whether to show a progress bar or not

seed

(chr, default = NULL) a string to seed the random number

use_py

(lgl, default = FALSE) whether to use python or not

Value

(tibble) the result of the query

Examples

if (FALSE) {

 db <- tibble(
   commenti = c(
     "deadly boring!",
     "A bit boring, but interesting",
     "How nice, I loved it!"
   )
 )

 role <- "Sei l'assistente di un docente universitario."
 context <- "State analizzando i commenti degli studenti dell'ultimo corso."
 task <- "Il tuo compito è capire se sono soddisfatti del corso."
 instructions <- "Analizza i commenti e decidi se sono soddisfatti o meno."
 output <- "Riporta 'soddisfatto' o 'insoddisfatto'."
 style <- "Non aggiungere nessun commento, restituisci solo ed
   esclusivamente la classificazione."
 examples <- "
 commento_1: 'Mi è piaciuto molto il corso; davvero interessante.'
 classificazione_1: 'soddisfatto'
 commento_2: 'Non mi è piaciuto per niente; una noia mortale'
 classificazione_2: 'insoddisfatto'
 "

 sys_prompt <- compose_sys_prompt(role = role, context = context)
 usr_prompt <- compose_usr_prompt(
   task = task, instructions = instructions, output = output,
   style = style, examples = examples
 )
 res <- db |>
  query_gpt_on_column(
    "commenti", sys_prompt = sys_prompt, usr_prompt = usr_prompt
  )
 res
}