Query GPT on a dataframe's column
Usage
query_gpt_on_column(
db,
text_column,
sys_prompt = NULL,
usr_prompt = NULL,
closing = NULL,
model = "gpt-4o-mini",
quiet = TRUE,
max_try = 10,
temperature = 0,
max_tokens = NULL,
endpoint = "https://api.openai.com/v1/chat/completions",
add = TRUE,
simplify = TRUE,
na_if_error = FALSE,
res_name = "gpt_res",
.progress = TRUE,
seed = NULL,
use_py = FALSE
)
Arguments
- db
(data.frame) the data to use
- text_column
(chr) the name of the column containing the text data
- sys_prompt
(chr) the system prompt to use
- usr_prompt
(chr) the user prompt to use
- closing
(chr, default = NULL) Text to include at the end of the prompt
- model
(chr, default = "gpt-4o-mini") the model to use
- quiet
(lgl, default = TRUE) whether to print information
- max_try
(int, default = 10) the maximum number of tries
- temperature
(dbl, default = 0) the temperature to use
- max_tokens
(dbl, default = 1000) the maximum number of tokens
- endpoint
(chr, default = "https://api.openai.com/v1/chat/completions", i.e. the OpenAI API) the endpoint to use for the request.
- add
(lgl, default = TRUE) whether to add the result to the original dataframe. If FALSE, it returns a tibble with the result only.
- simplify
(lgl, default = TRUE) whether to simplify the output
- na_if_error
(lgl, default = FALSE) whether to return NA if an error occurs
- res_name
(chr, default = "gpt_res") the name of the column containing the result
- .progress
(lgl, default = TRUE) whether to show a progress bar or not
- seed
(chr, default = NULL) a string to seed the random number
- use_py
(lgl, default = FALSE) whether to use python or not
Examples
if (FALSE) {
db <- tibble(
commenti = c(
"deadly boring!",
"A bit boring, but interesting",
"How nice, I loved it!"
)
)
role <- "Sei l'assistente di un docente universitario."
context <- "State analizzando i commenti degli studenti dell'ultimo corso."
task <- "Il tuo compito è capire se sono soddisfatti del corso."
instructions <- "Analizza i commenti e decidi se sono soddisfatti o meno."
output <- "Riporta 'soddisfatto' o 'insoddisfatto'."
style <- "Non aggiungere nessun commento, restituisci solo ed
esclusivamente la classificazione."
examples <- "
commento_1: 'Mi è piaciuto molto il corso; davvero interessante.'
classificazione_1: 'soddisfatto'
commento_2: 'Non mi è piaciuto per niente; una noia mortale'
classificazione_2: 'insoddisfatto'
"
sys_prompt <- compose_sys_prompt(role = role, context = context)
usr_prompt <- compose_usr_prompt(
task = task, instructions = instructions, output = output,
style = style, examples = examples
)
res <- db |>
query_gpt_on_column(
"commenti", sys_prompt = sys_prompt, usr_prompt = usr_prompt
)
res
}