Skip to content

Query Languages Guide

ArcadeDB Python bindings support two approaches:

  1. Pythonic API (Recommended): Use the Java embedded API directly - db.new_document(), vertex.set(), vertex.save()
  2. SQL/OpenCypher: Traditional query languages for complex queries and analytics

Best Practice: Use Pythonic API for CRUD

For creating, updating, and managing data, use the embedded API methods instead of SQL:

Creating Documents

# Create schema
db.schema.create_document_type("Task")
db.schema.create_property("Task", "title", "STRING")
db.schema.create_property("Task", "completed", "BOOLEAN")

# Insert using Pythonic API (RECOMMENDED)
with db.transaction():
    task = db.new_document("Task")
    task.set("title", "Buy groceries")
    task.set("completed", False)
    task.set("tags", ["shopping", "urgent"])
    task.save()

Creating Vertices

# Create schema
db.schema.create_vertex_type("Person")
db.schema.create_edge_type("Knows")

# Insert using Pythonic API (RECOMMENDED)
with db.transaction():
    alice = db.new_vertex("Person")
    alice.set("name", "Alice")
    alice.set("age", 30)
    alice.save()

    bob = db.new_vertex("Person")
    bob.set("name", "Bob")
    bob.save()

Bulk Inserts (preferred: chunked transactions)

# Efficient bulk insertion (embedded-friendly): use chunked transactions
chunk_size = 500
for start in range(0, len(people_data), chunk_size):
    with db.transaction():
        for name, age, city in people_data[start : start + chunk_size]:
            person = db.new_vertex("Person")
            person.set("name", name)
            person.set("age", age)
            person.set("city", city)
            person.save()

# If you specifically need the Java batching API (e.g., remote/server workflows),
# you can still use db.batch_context(...), but embedded users should prefer
# explicit chunked transactions to avoid batch_context overhead.

SQL for Queries

Use SQL for reading and complex analytics: graph traversal extensions.

Basic Queries

# Count using efficient method (RECOMMENDED)
count = db.count_type("Person")

# Query all with ordering
result = db.query("sql", "SELECT FROM Person ORDER BY name")
for person in result:
    print(person.get("name"))

# Query with WHERE
result = db.query("sql", "SELECT FROM Task WHERE priority = 'high' AND completed = false")
tasks = result.to_list()
for task in tasks:
    title = task["title"]
    tags = task["tags"]  # Automatically Python list
    print(f"{title}: {', '.join(tags)}")

# NULL checks
result = db.query(
    "sql",
    "SELECT name, phone, verified FROM Person WHERE email IS NULL"
)

Pagination

Use @rid-based pagination for best performance:

# RECOMMENDED: @rid-based pagination (fastest method)
last_rid = "#-1:-1"  # Start from beginning
batch_size = 1000

while True:
    # Query with @rid > last_rid for efficient pagination
    query = f"""
        SELECT *, @rid as rid FROM User
        WHERE @rid > {last_rid}
        LIMIT {batch_size}
    """
    chunk = list(db.query("sql", query))

    if not chunk:
        break  # No more records

    # Process batch
    for user in chunk:
        user_id = user.get("Id")
        name = user.get("DisplayName")
        # Process user...

    # Update cursor to last record's @rid
    last_rid = chunk[-1].get("rid")

# Alternative: OFFSET-based pagination (slower, not recommended for large datasets)
page = 0
page_size = 100
result = db.query("sql", f"SELECT FROM User LIMIT {page_size} SKIP {page * page_size}")

Parameters

Always use parameters to prevent SQL injection:

# Named parameters (recommended)
result = db.query(
    "sql",
    "SELECT FROM Person WHERE name = :name AND age > :min_age",
    {"name": "Alice", "min_age": 25}
)

# Positional parameters
result = db.query(
    "sql",
    "SELECT FROM Person WHERE name = ? AND age > ?",
    ["Alice", 25]
)

Updating Data

Prefer Pythonic API for updates:

# RECOMMENDED: Update using Pythonic API with .modify()
result = db.query("sql", "SELECT FROM Movie WHERE movieId = '1'")
with db.transaction():
    for movie_result in result:
        movie = movie_result.get_vertex()  # or .get_document()
        mutable_vertex = movie.modify()  # Get mutable version
        mutable_vertex.set("embedding", java_embedding)
        mutable_vertex.set("vector_id", "movie_1")
        mutable_vertex.save()

# Alternative: SQL UPDATE (for bulk operations)
with db.transaction():
    db.command("sql", """
        UPDATE Task SET completed = true, cost = 127.50
        WHERE title = 'Buy groceries'
    """)

Graph Traversal

# Find friends using MATCH
result = db.query(
    "sql",
    """
    MATCH {type: Person, as: alice, where: (name = 'Alice Johnson')}
          -FRIEND_OF->
          {type: Person, as: friend}
    RETURN friend.name as name, friend.city as city
    ORDER BY friend.name
""",
)

friends = result.to_list()
for friend in friends:
    print(f"{friend['name']} from {friend['city']}")

# Friends of friends (2 degrees)
result = db.query(
    "sql",
    """
    MATCH {type: Person, as: alice, where: (name = 'Alice Johnson')}
          -FRIEND_OF->
          {type: Person, as: friend}
          -FRIEND_OF->
          {type: Person, as: friend_of_friend, where: (name <> 'Alice Johnson')}
    RETURN DISTINCT friend_of_friend.name as name, friend.name as through_friend
    ORDER BY friend_of_friend.name
""",
)

for row in result:
    name = row.get("name")
    through = row.get("through_friend")
    print(f"{name} (through {through})")

Aggregations

# Count with first()
result = db.query("sql", "SELECT count(*) as count FROM Test")
count = result.first().get("count")

# Group by with statistics
result = db.query(
    "sql",
    """
    SELECT city, COUNT(*) as person_count,
           AVG(age) as avg_age
    FROM Person
    GROUP BY city
    ORDER BY person_count DESC, city
""",
)

for row in result:
    city = row.get("city")  # Python str
    count = row.get("person_count")  # Python int
    avg_age = row.get("avg_age")  # Python float
    print(f"{city}: {count} people, avg age {avg_age:.1f}")

ResultSet Methods

# first() - get first result
result = db.query("sql", "SELECT FROM Person ORDER BY name")
first_person = result.first()
assert first_person.get("name") == "Alice"

# to_list() - convert all to list
result2 = db.query("sql", "SELECT FROM Person ORDER BY name")
people_list = result2.to_list()
assert len(people_list) == 2
assert people_list[1]["name"] == "Bob"

# first() to check if results exist
result = db.query("sql", "SELECT FROM Person WHERE name = 'Unknown'")
first_mutual = result.first()
if first_mutual:
    print(f"Found: {first_mutual.get('name')}")
else:
    print("No results found")

OpenCypher

OpenCypher provides expressive graph pattern matching.

Basic Traversals

# Get vertex property values
result = db.query("opencypher", "MATCH (p:Person) RETURN p.name as name")
names = [record.get("name") for record in result]
assert "Alice" in names or "Bob" in names

# Count vertices
result = db.query("opencypher", "MATCH (p:Person) RETURN count(p) as count")
results = list(result)
count = results[0].get("count") if results else 0

Graph Traversals

# Complex projection with aggregation
query = """
    MATCH (q:Question)
    OPTIONAL MATCH (q)-[:HAS_ANSWER]->(a:Answer)
    RETURN q.Title as title, count(a) as answer_count, q.Score as score
    ORDER BY answer_count DESC
    LIMIT 5
"""

results = list(db.query("opencypher", query))

for i, result in enumerate(results, 1):
    title = result.get("title") or "Unknown"
    answer_count = result.get("answer_count") or 0
    score = result.get("score") or 0
    print(f"[{i}] Answers: {answer_count}, Score: {score}")
    print(f"    {title[:70]}...")

Processing Results

# Simple value extraction
result = db.query("opencypher", "MATCH (p:Person) RETURN p.name as name")
names = [record.get("name") for record in result]

# Project returns named keys directly
query = """
    MATCH (q:Question)
    RETURN q.Title as title, q.Score as score
    LIMIT 5
"""
results = list(db.query("opencypher", query))
for result in results:
    title = result.get("title")
    score = result.get("score")