SDK features using concrete examples¶
These examples walk through some features of the platform in more detail.
Root Signals evaluators¶
Root Signals provides over 30 ready-made evaluators that can be used to validate any textual content.
from root import RootSignals
# Connect to the Root Signals API
client = RootSignals()
result = client.evaluators.Clarity(
response="""I only use direct language without any weasel words.
I am clear and concise.""",
)
print(result.score)
Adjust evaluator behavior¶
An evaluator can be calibrated to adjust its behavior
from root import RootSignals
from root.skills import EvaluatorDemonstration
client = RootSignals()
# Create an evaluator
direct_language_evaluator = client.evaluators.create(
name="Direct language",
predicate="Is the following text clear and has no weasel words: {{output}}",
intent="Is the language direct and unambiguous",
model="gpt-4o",
)
# Run first calibration
test_result = client.evaluators.calibrate_existing(
evaluator_id=direct_language_evaluator.id,
test_data=[
["0.1", "There will probably be a meeting tomorrow"],
["0.1", "We probably won't need to make any major changes."],
],
)
print(test_result[0].result)
# Improve the evaluator with demonstrations, penalize the "probably" weasel word
client.skills.update(
skill_id=direct_language_evaluator.id,
evaluator_demonstrations=[
EvaluatorDemonstration(
output="The project will probably be completed on time.",
score=0.1,
),
EvaluatorDemonstration(
output="He probably knows the answer to your question.",
score=0.1,
),
EvaluatorDemonstration(output="It will probably rain later today.", score=0.1),
],
)
# Run second calibration
test_result = client.evaluators.calibrate_existing(
evaluator_id=direct_language_evaluator.id,
test_data=[
["0.1", "There will probably be a meeting tomorrow"],
["0.1", "We probably won't need to make any major changes."],
],
)
# Check the results
print(test_result[0].result)
// print(test_result[0].result)
"score": "0.5",
"expected_score": "0.1",
// print(test_result[0].result)
"score": "0.1",
"expected_score": "0.1",
Retrieval Augmented Generation (RAG) evaluation¶
For RAG, there are special evaluators that can separately measure the different intermediate components of a RAG pipeline, in addition to the final output.
from root import RootSignals
# Connect to the Root Signals API
client = RootSignals()
result = client.evaluators.Truthfulness(
request="Return a recipe for a sauce.",
response="""Quarter peppers and slice onion.
I used the seeds and ribs of the peppers for optimal heat.
Sautee in a small sauce pan with oil on medium heat.
Keep a lid on it but stir often.""",
contexts=[
"This is a cookbook with many cookies recipes such as: " "1) Recipe for a tomato sauce",
],
)
print(result.score)
// print(result.score)
"0.0"
Minimal Skill¶
Skills are measurable units of automations powered by LLMs. The APIs typically respond with Python objects that can be used to chain requests or alternatively reuse previous calls’ results.
from root import RootSignals
# Connect to the Root Signals API
client = RootSignals()
# Create a skill
skill = client.skills.create(
"""
Classify this text into one of the following: {{categories}}
Text: {{text}}
"""
)
# Execute it
response = skill.run(
{
"text": "The expectation for rate cuts has been steadily declining.",
"categories": "Finance, Sports, Politics",
}
)
print(response)
// print(response)
"llm_output": "Finance",
"validation": "Validation(is_valid=True, validator_results=[])",
"model": "gpt-4o",
"execution_log_id": "9b3c713d-7bdc-4f7d-a85c-ed7d92ff4a56",
"rendered_prompt": "Classify this text into ...",
"cost": "5.6e-05",
Simple Skill¶
The simple skill example adds some more metadata to the skill. It specifies explicitly the model to use, the descriptive intent, and the input variables that are referred to in the prompt.
from root import RootSignals
# Connect to the Root Signals API
client = RootSignals()
# Create a skill
skill = client.skills.create(
name="My text classifier",
intent="To classify text into arbitrary categories based on semantics",
prompt="""
Classify this text into one of the following: {{categories}}
Text: {{text}}
""",
model="gpt-4",
)
# Execute
response = skill.run(
{
"text": "The expectation for rate cuts has been steadily declining.",
"categories": "Finance, Sports, Politics",
}
)
print(response)
# We can retrieve the skill by id
skill_2 = client.skills.get(skill_id=skill.id)
response = skill_2.run(
{
"text": "The expectation for rate cuts has been steadily declining.",
"categories": "Finance, Sports, Politics",
}
)
# We can also retrieve it by name
# (the list result is an iterator, so we just take first one)
#
# The name is not an unique identifier. Consequently, the .run method is not
# intentionally available. However, you can circumvent this restriction if you
# wish by using:
skill_3 = next(client.skills.list(name="My text classifier"))
response = client.skills.run(
skill_3.id,
{
"text": "The expectation for rate cuts has been steadily declining.",
"categories": "Finance, Sports, Politics",
},
)
// print(response)
"llm_output": "Finance",
"validation": "Validation(is_valid=True, validator_results=[])",
"model": "gpt-4",
"execution_log_id": "1181e790-7b87-457f-a2cb-6b1dfc1eddf4",
"rendered_prompt": "Classify this text into ...",
"cost": "0.00093",
Skill with a validator¶
In order to ensure the results of skill execution remain within acceptable guardrails, we can add a validator. In this example, the validator scores the results by the clarity of the model output.
from root import RootSignals
from root.validators import Validator
# Connect to the Root Signals API
client = RootSignals()
skill = client.skills.create(
name="My strict chatbot",
intent="Simple Q&A chatbot",
prompt="Provide a clear answer to the question: {{question}}",
model="gpt-4",
validators=[Validator(evaluator_name="Clarity", threshold=0.6)],
)
# Execute the skill
response = skill.run({"question": "What is the capital of France?"})
# Get the validation results
print(response.validation)
# Get the execution details
log = client.execution_logs.get(execution_result=response)
print(log)
# List all execution logs
iterator = client.execution_logs.list(limit=10)
print(next(iterator))
// print(response.validation)
"validator_results": [
{
"cost": "0.000...",
"evaluator_name": "Clarity",
"threshold": "0.6",
"is_valid": "True",
"result": "0.xy"
}
]
// print(log)
"cost": "0.000...",
"skill": {
"name": "My Q&A chatbot",
"..."
},
"llm_output": "The capital of France is Paris.",
"rendered_prompt": "Provide a clear answer to the question: What is ...",
"validation_results": [
"evaluator_name": "Clarity",
"result": "0.9",
"is_valid": "true",
"..."
],
"..."
// print(next(iterator))
// Note that the list result does not contain the full execution details
{
"cost": "0.000..."
"skill": {
"name": "My Q&A chatbot"
}
"..."
Skill with reference data¶
Skills can leverage reference data, such as a document, to provide additional context to the model.
import tempfile
from root import RootSignals
from root.skills import ReferenceVariable
client = RootSignals()
with tempfile.NamedTemporaryFile(suffix=".csv", mode="w") as fp:
fp.write("[email protected]\n[email protected]\n")
fp.flush()
dataset = client.datasets.create(name="List of email addresses", path=fp.name, type="reference")
skill = client.skills.create(
reference_variables=[ReferenceVariable(dataset_id=dataset.id, name="email_dataset")],
intent="Email address list assistant.",
name="Email address dataset chatbot",
prompt="{{email_dataset}}",
model="gpt-4",
)
response = skill.run(
{"email_dataset": ("Which email has the longest non-domain part?" "Respond with just the email address.")},
)
print(response.llm_output)
# print(response.llm_output)
[email protected]
Skill with a data loader¶
In this example, we add a data loader which loads the contents of a data loader variable called kimchi_ingredients from Wikipedia, and then uses that to populate the prompt.
from root import RootSignals
from root.data_loader import WebPageDataLoader
# Create a client
client = RootSignals()
# Create a skill
skill = client.skills.create(
name="My kimchi cooking assistant",
intent="This skill will find you a recipe for kimchi.",
prompt=("Help me prepare Kimchi. " "Use this web page for ingredients {{kimchi_ingredients}}"),
model="gpt-4o",
fallback_models=["gpt-4"],
data_loaders=[
WebPageDataLoader(
name="kimchi_ingredients",
url="https://simple.wikipedia.org/wiki/Kimchi",
)
],
)
response = skill.run()
print(response)
// print(response)
"llm_output":"Kimchi, a traditional Korean side dish, is renowned for
its unique taste and health benefits. Made from salted and fermented
vegetables, it uses seasonings such as gochugaru (Korean chili
powder), ... Here are a few recipes for you to try at
home:\n\n1. Traditional Napa Cabbage Kimchi:\n\nIngredients:..."
Evaluator Skill and minimal version of it¶
We can also create an evaluator skill. Evaluator skills return only floating point values between 0 and 1, based on how well the received output (of a skill) matches what the evaluator is described to look for.
from root import RootSignals
from root.validators import Validator
client = RootSignals()
evaluator_skill = client.evaluators.create(
name="Cooking recipe",
intent="This skill will evaluate if the answer is a cooking recipe.",
predicate="Is the following a cooking recipe: {{output}}",
model="gpt-4o",
)
cooking_skill = client.skills.create(
name="Cooking skill with a custom evaluator",
prompt="Find me a good recipe for Italian food.",
validators=[
Validator(evaluator_id=evaluator_skill.id, threshold=0.1),
Validator(
evaluator_name="Truthfulness",
threshold=0.5,
),
],
)
response = cooking_skill.run()
# Check if the recipe was about cooking
print(response.validation)
// print(response.validation)
{
"validator_results": [
{
"evaluator_name": "Cooking recipe evaluator",
"evaluator_id": "...",
"threshold": "0.1",
"is_valid": "True",
"result": "0.9",
"status": "finished"
}
],
"is_valid": "True"}
The evaluator skill can be also created implicitly by supplying evaluator_name and a prompt:
from root import RootSignals
from root.validators import Validator
client = RootSignals()
cooking_skill = client.skills.create(
name="Cooking skill with validators",
prompt="Find me a good recipe for Italian food.",
validators=[
Validator(
evaluator_name="Cooking recipe evaluator",
prompt="Is the following a cooking recipe: {{output}}",
threshold=0.1,
),
Validator(
evaluator_name="Truthfulness",
threshold=0.5,
),
],
)
response = cooking_skill.run()
# Check if the recipe was about cooking
print(response.validation)
// print(response.validation)
"validator_results": [
{
"evaluator_name": "Cooking recipe evaluator",
"evaluator_id": "...",
"threshold": "0.1",
"is_valid": "True",
"result": "0.8",
"status": "finished"
}
],
"is_valid": "True"}
Use OpenAI client for chat completions¶
Evaluators and monitoring can be added to your existing codebase using OpenAI client. To do this, retrieve base_url
from the Root Signals SDK Skill, and then use the normal openai
API client with it. There are two ways to do it:
Without streaming, the API returns whole response to the call:
from openai import OpenAI
from root import RootSignals
from root.validators import Validator
# Connect to the Root Signals API
rs_client = RootSignals()
model = "gpt-4o"
another_model = "gpt-4"
skill = rs_client.skills.create(
name="My chatbot",
intent="Simple Q&A chatbot",
system_message="You are a helpful assistant.",
model=model,
fallback_models=[another_model],
validators=[Validator(evaluator_name="Truthfulness", threshold=0.8)],
)
# Start chatting with the skill (non-streaming)
client = OpenAI(base_url=skill.openai_base_url, api_key=rs_client.api_key)
messages = [
# {"role": "system", "content": "You are a helpful assistant."},
# ^ implicit in skill
{"role": "user", "content": "Why is the sky blue?"},
]
completion = client.chat.completions.create(model=model, messages=messages)
print(completion.choices[0].message.content)
# We can use either the model, or one of the fallback models defined for the
# skill. We will use the fallback model here.
messages = [
{"role": "user", "content": "Why is the sky blue?"},
]
completion = client.chat.completions.create(model=another_model, messages=messages)
print(completion.choices[0].message.content)
# We can get the full execution details, including the validation results
log = rs_client.execution_logs.get(log_id=completion.id)
print(log.validation_results)
# print(completion.choices[0].message.content)
The sky appears blue because of the way sunlight interacts ...
# print(completion.choices[0].message.content)
The sky appears blue because of the way sunlight interacts ...
// print(log.validation_results)
[
"evaluator_name": "Truthfulness"
"result": "0.9"
"is_valid": "true"
"..."
]
Do note that only models specified as either model
or
fallback_models
to the created Skill are accepted by the API. Trying
to use other model names will result in an error.
When streaming (stream=True
), the API response will be provided as a generator which will provide a set of chunks over time:chunks :
from openai import OpenAI
from root import RootSignals
from root.validators import Validator
# Connect to the Root Signals API
rs_client = RootSignals()
model = "gpt-4o"
skill = rs_client.skills.create(
name="My Q&A chatbot",
intent="Simple Q&A chatbot",
system_message="You are a helpful assistant.",
model=model,
validators=[Validator(evaluator_name="Truthfulness", threshold=0.8)],
)
# Start chatting with the skill
client = OpenAI(base_url=skill.openai_base_url, api_key=rs_client.api_key)
messages = [
{"role": "user", "content": "Why is the sky blue?"},
]
completion = client.chat.completions.create(model=model, messages=messages, stream=True)
for chunk in completion:
print(chunk.choices[0].delta.content)
# print(chunk.choices[0].delta.content)
The sky appears blue because of the way sunlight interacts ...
Do note that if validators are in use, it is not possible to stream the response as the response must be validated before returning it to the caller. In that case (and possibly for other reasons too), the platform will just return the final full response after validators are done evaluating it as a single chunk.
Evaluate your LLM pipeline by grouping validators to a Skill¶
We can group and track any LLM pipeline results using a skill.
from root import RootSignals
from root.validators import Validator
# Connect to the Root Signals API
client = RootSignals()
# Create an objective which describes what we are trying to do
objective = client.objectives.create(
intent="Child-safe clear response",
validators=[
Validator(evaluator_name="Clarity", threshold=0.2),
Validator(evaluator_name="Safety for Children", threshold=0.3),
],
)
# Create a skill which identifies only this particular LLM pipeline
skill = client.skills.create(name="My pipeline", objective_id=objective.id)
# Run your LLM pipeline and use the skill to evaluate the LLM output...
# llm_response = run_my_pipeline()
# Evaluate the result
llm_response = "Some LLM response I got from my custom LLM pipeline."
response = skill.evaluate(response=llm_response)
print(response)
// print(response)
"validation":
"validation_results": [
"evaluator_name": "Clarity"
"result": "0.5"
"is_valid": "true"
"..."
]
Alternatively, we can just execute the objective.
from root import RootSignals
from root.validators import Validator
# Connect to the Root Signals API
client = RootSignals()
# Create an objective which describes what we are trying to do
objective = client.objectives.create(
intent="Child-safe clear response",
validators=[
Validator(evaluator_name="Clarity", threshold=0.2),
Validator(evaluator_name="Safety for Children", threshold=0.3),
],
)
llm_response = "Some LLM response I got from my custom LLM pipeline."
response = objective.run(response=llm_response)
print(response)
// print(response)
"validation":
"validation_results": [
"evaluator_name": "Clarity"
"result": "0.5"
"is_valid": "true"
"..."
]
Add a model¶
Adding a model is as simple as specifying the model name and an endpoint. The model can be a local model or a model hosted on a cloud service.
from root import RootSignals
# Connect to the Root Signals API
client = RootSignals()
# Add a self-hosted model using Ollama
model = client.models.create(
name="ollama/llama3",
# URL pointing to the model's endpoint. Replace this with your own endpoint.
url="https://d65e-88-148-175-2.ngrok-free.app",
)
# Use the model in a skill
skill = client.skills.create(name="My model test", prompt="Hello, my model!", model="ollama/llama3")