Query Languages Guide¶
ArcadeDB Python bindings support two approaches:
- SQL/OpenCypher (Recommended): Use DSL for schema, CRUD, and graph operations
- Language Choice: Use SQL for relational-style operations and OpenCypher for graph traversals
Best Practice: Use DSL for CRUD¶
For creating, updating, and managing data, prefer SQL/OpenCypher statements:
Creating Documents¶
# Create schema
db.command("sql", "CREATE DOCUMENT TYPE Task")
db.command("sql", "CREATE PROPERTY Task.title STRING")
db.command("sql", "CREATE PROPERTY Task.completed BOOLEAN")
# Insert using SQL (RECOMMENDED)
with db.transaction():
db.command(
"sql",
"INSERT INTO Task SET title = ?, completed = ?, tags = ?",
"Buy groceries",
False,
["shopping", "urgent"],
)
Creating Vertices¶
# Create schema
db.command("sql", "CREATE VERTEX TYPE Person")
db.command("sql", "CREATE EDGE TYPE Knows UNIDIRECTIONAL")
# Insert using SQL (RECOMMENDED)
with db.transaction():
db.command("sql", "INSERT INTO Person SET name = ?, age = ?", "Alice", 30)
db.command("sql", "INSERT INTO Person SET name = ?, age = ?", "Bob", 25)
Bulk Inserts (preferred: chunked transactions)¶
# Efficient bulk insertion (embedded-friendly): use chunked SQL transactions
chunk_size = 500
for start in range(0, len(people_data), chunk_size):
with db.transaction():
for name, age, city in people_data[start : start + chunk_size]:
db.command(
"sql",
"INSERT INTO Person SET name = ?, age = ?, city = ?",
name,
age,
city,
)
# If you specifically need wrapper-level Java batching APIs,
# you can still use db.batch_context(...), but DSL remains the default path.
SQL for Queries¶
Basic Queries¶
# Count using efficient method (RECOMMENDED)
count = db.count_type("Person")
# Query all with ordering
result = db.query("sql", "SELECT FROM Person ORDER BY name")
for person in result:
print(person.get("name"))
# Query with WHERE
result = db.query("sql", "SELECT FROM Task WHERE priority = 'high' AND completed = false")
tasks = result.to_list()
for task in tasks:
title = task["title"]
tags = task["tags"] # Automatically Python list
print(f"{title}: {', '.join(tags)}")
# NULL checks
result = db.query(
"sql",
"SELECT name, phone, verified FROM Person WHERE email IS NULL"
)
Pagination¶
Use @rid-based pagination for best performance:
# RECOMMENDED: @rid-based pagination (fastest method)
last_rid = "#-1:-1" # Start from beginning
batch_size = 1000
while True:
# Query with @rid > last_rid for efficient pagination
query = f"""
SELECT *, @rid as rid FROM User
WHERE @rid > {last_rid}
LIMIT {batch_size}
"""
chunk = list(db.query("sql", query))
if not chunk:
break # No more records
# Process batch
for user in chunk:
user_id = user.get("Id")
name = user.get("DisplayName")
# Process user...
# Update cursor to last record's @rid
last_rid = chunk[-1].get("rid")
# Alternative: OFFSET-based pagination (slower, not recommended for large datasets)
page = 0
page_size = 100
result = db.query("sql", f"SELECT FROM User LIMIT {page_size} SKIP {page * page_size}")
Parameters¶
Always use parameters to prevent SQL injection:
# Named parameters (recommended)
result = db.query(
"sql",
"SELECT FROM Person WHERE name = :name AND age > :min_age",
{"name": "Alice", "min_age": 25}
)
# Positional parameters
result = db.query(
"sql",
"SELECT FROM Person WHERE name = ? AND age > ?",
["Alice", 25]
)
SQLScript (multi-statement)¶
Use sqlscript to run multiple statements in one call. When there is no
explicit RETURN, the result set contains the last executed statement
(including DDL such as CREATE/ALTER).
script = """
CREATE VERTEX TYPE SqlScriptVertex;
INSERT INTO SqlScriptVertex SET name = 'test';
ALTER TYPE SqlScriptVertex ALIASES ss;
"""
result = db.command("sqlscript", script)
last = result.first()
assert last.get("operation") == "ALTER TYPE"
assert last.get("typeName") == "SqlScriptVertex"
Updating Data¶
Prefer SQL for updates:
# RECOMMENDED: SQL UPDATE
with db.transaction():
db.command("sql", """
UPDATE Movie
SET embedding = :embedding, vector_id = :vector_id
WHERE movieId = :movie_id
""", {
"embedding": java_embedding,
"vector_id": "movie_1",
"movie_id": "1"
})
# SQL UPDATE is also ideal for bulk operations
with db.transaction():
db.command("sql", """
UPDATE Task SET completed = true, cost = 127.50
WHERE title = 'Buy groceries'
""")
Update with JSON array content¶
ArcadeDB supports UPDATE ... CONTENT with JSON arrays to update multiple
documents in one statement.
with db.transaction():
db.command(
"sql",
"""
INSERT INTO JsonArrayDoc CONTENT
[{"name":"tim"},{"name":"tom"}]
""",
)
with db.transaction():
inserted = db.query("sql", "SELECT @rid, name FROM JsonArrayDoc").to_list()
update_content = ", ".join(
f"{{@rid:'{row['@rid']}',name:'{row['name']}',status:'updated'}}"
for row in inserted
)
result = db.command(
"sql",
f"UPDATE JsonArrayDoc CONTENT [{update_content}] RETURN AFTER",
)
rows = result.to_list()
assert {row["status"] for row in rows} == {"updated"}
TRUNCATE BUCKET¶
Use TRUNCATE BUCKET to quickly delete all records in a bucket. This is a
low-level operation; prefer DELETE FROM <Type> unless you specifically need
bucket-level maintenance.
db.command("sql", "CREATE DOCUMENT TYPE BucketDoc BUCKETS 1")
bucket_info = db.query(
"sql",
"SELECT name FROM schema:buckets WHERE type = 'BucketDoc' LIMIT 1",
).first()
bucket_name = bucket_info.get("name")
with db.transaction():
db.command("sql", f"TRUNCATE BUCKET {bucket_name}")
Graph Traversal¶
# Find friends using MATCH
result = db.query(
"sql",
"""
MATCH {type: Person, as: alice, where: (name = 'Alice Johnson')}
-FRIEND_OF->
{type: Person, as: friend}
RETURN friend.name as name, friend.city as city
ORDER BY friend.name
""",
)
friends = result.to_list()
for friend in friends:
print(f"{friend['name']} from {friend['city']}")
# Friends of friends (2 degrees)
result = db.query(
"sql",
"""
MATCH {type: Person, as: alice, where: (name = 'Alice Johnson')}
-FRIEND_OF->
{type: Person, as: friend}
-FRIEND_OF->
{type: Person, as: friend_of_friend, where: (name <> 'Alice Johnson')}
RETURN DISTINCT friend_of_friend.name as name, friend.name as through_friend
ORDER BY friend_of_friend.name
""",
)
for row in result:
name = row.get("name")
through = row.get("through_friend")
print(f"{name} (through {through})")
Aggregations¶
# Count with first()
result = db.query("sql", "SELECT count(*) as count FROM Test")
count = result.first().get("count")
# Group by with statistics
result = db.query(
"sql",
"""
SELECT city, COUNT(*) as person_count,
AVG(age) as avg_age
FROM Person
GROUP BY city
ORDER BY person_count DESC, city
""",
)
for row in result:
city = row.get("city") # Python str
count = row.get("person_count") # Python int
avg_age = row.get("avg_age") # Python float
print(f"{city}: {count} people, avg age {avg_age:.1f}")
Full-text search ($score)¶
When using full-text indexes, ArcadeDB exposes a $score variable that you can
select and order by.
# Create full-text index
db.command("sql", "CREATE DOCUMENT TYPE Article")
db.command("sql", "CREATE PROPERTY Article.content STRING")
db.command("sql", "CREATE INDEX ON Article (content) FULL_TEXT")
# Query with SEARCH_FIELDS and $score
result = db.query(
"sql",
"SELECT content, $score FROM Article WHERE SEARCH_FIELDS(['content'], 'database') = true ORDER BY $score DESC",
)
for row in result:
print(row.get("content"), row.get("$score"))
ResultSet Methods¶
# first() - get first result
result = db.query("sql", "SELECT FROM Person ORDER BY name")
first_person = result.first()
assert first_person.get("name") == "Alice"
# to_list() - convert all to list
result2 = db.query("sql", "SELECT FROM Person ORDER BY name")
people_list = result2.to_list()
assert len(people_list) == 2
assert people_list[1]["name"] == "Bob"
# first() to check if results exist
result = db.query("sql", "SELECT FROM Person WHERE name = 'Unknown'")
first_mutual = result.first()
if first_mutual:
print(f"Found: {first_mutual.get('name')}")
else:
print("No results found")
OpenCypher¶
OpenCypher provides expressive graph pattern matching.
Basic Traversals¶
# Get vertex property values
result = db.query("opencypher", "MATCH (p:Person) RETURN p.name as name")
names = [record.get("name") for record in result]
assert "Alice" in names or "Bob" in names
# Count vertices
result = db.query("opencypher", "MATCH (p:Person) RETURN count(p) as count")
results = list(result)
count = results[0].get("count") if results else 0
Graph Traversals¶
# Complex projection with aggregation
query = """
MATCH (q:Question)
OPTIONAL MATCH (q)-[:HAS_ANSWER]->(a:Answer)
RETURN q.Title as title, count(a) as answer_count, q.Score as score
ORDER BY answer_count DESC
LIMIT 5
"""
results = list(db.query("opencypher", query))
for i, result in enumerate(results, 1):
title = result.get("title") or "Unknown"
answer_count = result.get("answer_count") or 0
score = result.get("score") or 0
print(f"[{i}] Answers: {answer_count}, Score: {score}")
print(f" {title[:70]}...")
Processing Results¶
# Simple value extraction
result = db.query("opencypher", "MATCH (p:Person) RETURN p.name as name")
names = [record.get("name") for record in result]
# Project returns named keys directly
query = """
MATCH (q:Question)
RETURN q.Title as title, q.Score as score
LIMIT 5
"""
results = list(db.query("opencypher", query))
for result in results:
title = result.get("title")
score = result.get("score")