SQLAlchemy Group By With Full Child Objects

In this article, we will explore how to use SQLAlchemy Group By With Full Child Objects in Python. SQLAlchemy provides several techniques to achieve SQLAlchemy Group By with Full Child Objects.

SQLAlchemy Group By With Full Child Objects

In this section, we are making a connection with the database, we are creating the structure of two tables with parent-child relationships. Also, we are inserting data in both tables.

Python3

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.ext.declarative import declarative_base

# create Base class
Base = declarative_base()

# Establish connection
engine = create_engine("mysql+pymysql://userName:password@host/dbName")

# model class


class Department(Base):
    __tablename__ = "department"
    id = Column(Integer, primary_key=True)
    name = Column(String(50))

    # the argument should be  Class name not table name
    employees = relationship('Employee')


class Employee(Base):
    __tablename__ = "employee"
    emp_id = Column(Integer, primary_key=True)
    emp_name = Column(String(50))
    age = Column(Integer)
    salary = Column(DECIMAL)

    # dep_id is just to define the foreign key
    dep_id = Column(Integer, ForeignKey("department.id"))
    department = relationship("Department")


# creating tables in DB,if tables alredy creatd no need of these statement
Base.metadata.create_all(engine)


# creating session
Session = sessionmaker(bind=engine)
session = Session()


# creating instances of department class
ece_dep = Department(id=1, name="ECE")
cse_dep = Department(id=2, name="CSE")
mech_dep = Department(id=3, name="MECH")

# ece employees
emp1 = Employee(emp_id=101, emp_name="Kohli",  age=34,
                salary=75000,  dep_id=ece_dep.id)
emp2 = Employee(emp_id=102, emp_name="Dhoni",  age=45,
                salary=120000, dep_id=ece_dep.id)
emp3 = Employee(emp_id=103, emp_name="Sachine", age=39,
                salary=100000, dep_id=ece_dep.id)

# cse employess
emp4 = Employee(emp_id=201, emp_name="Alice",   age=26,
                salary=40000, dep_id=cse_dep.id)
emp5 = Employee(emp_id=202, emp_name="Bob",     age=56,
                salary=75000, dep_id=cse_dep.id)
emp6 = Employee(emp_id=203, emp_name="charlie", age=43,
                salary=50000, dep_id=cse_dep.id)


# mech employees
emp7 = Employee(emp_id=301, emp_name="Ronaldo", age=56,
                salary=40000, dep_id=mech_dep.id)
emp8 = Employee(emp_id=302, emp_name="Messi",   age=54,
                salary=30000, dep_id=mech_dep.id)
emp9 = Employee(emp_id=303, emp_name="neymar",  age=45,
                salary=25000, dep_id=mech_dep.id)

# adding instances to session
session.add_all([ece_dep, cse_dep, mech_dep, emp1, emp2,
                 emp3, emp4, emp5, emp6, emp7, emp8, emp9])

# committing the changes
session.commit()

# closing the connection
session.close()

Output

Joinedload Technique

Joinedload technique in SQLAlchemy is used to explicitly instruct the ORM to perform a join query to load related objects along with the primary object. In SQLAlchemy, you can enable joined-load loading using the options() and joinedload() methods.

First, you define a subquery to calculates the count of employees per department using the session.query() method and specify the necessary columns(Department.id,Department.name) and aggregations(func.count(Employee.emp_id)).

session.query(Department,func.count(Employee.emp_id).label("employee_count"))\
    .join(Department.employees)\
    .group_by(Department)\
    .options(joinedload(Department.employees))\
    .all()

Then use join(Employess) to sepcify that the query should perform an join between the Departmant table and Employee table.

.join(Department.employees)

We specify the group_by() clause to group the results by the Department entity.

.group_by(Department)

Then, we use the options() method on the query object to specify additional query options, such as joinedload loading of relationships.
We pass the joinedload() method as argument to options().
Inside the joinedload() method, we specify the relationship Department.employees to load. indicating that we want to load the employee's relationship of the Department model.

.options(joinedload(Department.employees))

finally, we use all() to retrieve the data.

.all()

Code Implementation

Python3

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.orm import joinedload
from sqlalchemy.ext.declarative import declarative_base

# create Base class
Base = declarative_base()

# Establish connection
engine = create_engine("mysql+pymysql://user:password@host/dbName")

# model class
class Department(Base):
    __tablename__ = "department"
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    
    #the argument should be  Class name not table name
    employees = relationship('Employee')


class Employee(Base):
    __tablename__ = "employee"
    emp_id = Column(Integer, primary_key=True)
    emp_name = Column(String(50))
    age=Column(Integer)
    salary=Column(DECIMAL)

    #dep_id is just to define the foreign key 
    dep_id = Column(Integer, ForeignKey("department.id"))
    department = relationship("Department")


# creating tables in DB,if tables alredy 
# creatd no need of these statement
Base.metadata.create_all(engine)


# creating session
Session = sessionmaker(bind=engine)
session = Session()

#query
joinloadQuery =session.query(Department,
          func.count(Employee.emp_id).label("count"))\
    .join(Department.employees)\
    .group_by(Department)\
    .options(joinedload(Department.employees))

#execuing query with DB and fetching results
result=joinloadQuery.all()

print("Joinload technique result")
print("Dep Count")
for dep,count in result:
    print(dep.name,count)

# closing the connection
session.close()

Output

Contains Eager Loading Technique

contains eager technique in SQLAlchemy is used to indicate that a query should eagerly load a specific relationship and include it in the result set. It is particularly useful when you have loaded a relationship using a separate query or a different loading technique, and you want to include those related objects in the result set of the current query.

First, you define a subquery to calculates the sum of salaries of employees per department using the session.query() method and specify the necessary columns(Department.id,Department.name) and aggregations(func.sum(Employee.salary)).

query = session.query(Department, func.sum(Employee.salary)) \
        .join(Employee) \
        .group_by(Department.name) \
        .options(contains_eager(Department.employees)) \
        .all()

Then use join(Employess) to specify that the query should perform an join between the Department table and Employee table.

.join(Employee)

We specify the group_by() clause to group the results by the Department entity.

.group_by(Department.name)

Then, we use the options() method on the query object to specify additional query options, such as contains_eager loading of relationships.
we pass the contains_eager() method as argument to options().
Inside the contains_eager() method, we specify the relationship Department.employees to load. indicating that we want to load the employee's relationship of the Department model.

.options(contains_eager(Department.employees))

Finally, we use all() to retrieve the data

.all()

Code Implemention

Python3

from sqlalchemy.orm import contains_eager

# query
Query = session.query(Department, 
        func.sum(Employee.salary).label("salary"))\
    .join(Employee)\
    .group_by(Department.name)\
    .options(contains_eager(Department.employees))

# executing the Query with DB
result = Query.all()

print("contains_eager technique result")
print("Dep salarySum")
for dep, salary in result:
    print(dep.name, salary)

Output

Subquery Technique

We first define a subquery that calculates the count of employees per department. The subquery is created using session.query() with the necessary columns and aggregations, followed by a join() with the Employee table and a group_by() on the Department.id column. The subquery() at the end converts the query into a subquery object.

session.query(Department.id, func.count(Employee.emp_id).label('employee_count'))
    .join(Employee)
    .group_by(Department.id)
    .subquery()

First, you define a subquery to calculates the count of employees per department using the session.query() method and specify the necessary columns(Department.id,Department.name) and aggregations(func.count(Employee.emp_id)).

.join(Employee)

Then use join(Employess) to specify that the query should perform a join between the Department table and Employee table.

.group_by(Department.id)

Then we use group_by() method is used to group the results by the id column of the Department.

.subquery()

Finally we use .subquery() it will convert the query object to subquery object

Code

Python3

#subquery
subQuery=session\
    .query(Department.id, func.count(Employee.emp_id)\
    .label('employee_count'))\
    .join(Employee)\
    .group_by(Department.id)\
    .subquery()

#main query
mainQuery=session.query(Department,subQuery.c.employee_count)\
        .join(subQuery,Department.id == subQuery.c.id).all()

print("Subquery Technique")
for dep in mainQuery:
    print(dep[0].name,dep[1])

session.close()

Output :

Load Technique

By using the Load technique, you can fine-tune how SQLAlchemy fetches related objects, optimizing performance and reducing the number of queries issued to the database.

query = session.query(Department.name, func.avg(Employee.salary)) 
               .join(Employee, Employee.dep_id == Department.id) 
               .group_by(Department.name) 
               .options(Load(Employee))
               .all()

session.query(Department.name, func.avg(Employee.salary)): We start by creating a query object that selects the name attribute from the Department model and calculates the average salary using the func.avg() function on the salary attribute from the Employee model.
join(Employee, Employee.dep_id == Department.id): We use the join() method to join the Employee table with the Department table with joining condition.
group_by(Department.name): We group the results by the name attribute of the Department model. This ensures that we get the average salary per department.
options(Load(Employee)): We specify the loading options for the Employee model using the Load() technique. By passing Load(Employee), we indicate that we want to customize the loading behavior for the Employee objects in the query.
all(): We execute the query and retrieve all the results.

Code

Python3

from sqlalchemy.orm import Load

# Query to calculate the average salary of employees in each department
query = session.query(Department.name, func.avg(Employee.salary)) \
               .join(Employee, Employee.dep_id == Department.id) \
               .group_by(Department.name) \
               .options(Load(Employee))\
               .all()
print("Load technique result")
print("Dep averageSalary")
for depName, salary in query:
    print(depName, salary)

# closing the connection
session.close()

Output

Load technique result
Dep averageSalary
ECE 98333.3333
CSE 55000.0000
MECH 31666.6667

Selectin Load Technique

The selectinload() method allows you to load related objects in a separate SELECT statement rather than joining them with the main query. This can be useful when you have a large number of related objects or when the relationship involves many-to-many or one-to-many associations.

Python3

from sqlalchemy.orm import selectinload

query = session.query(Department, func.min(Employee.salary)) \
               .join(Employee) \
               .group_by(Department.name) \
               .options(selectinload(Department.employees))

result = query.all()

for dep,salary in result:
    print(dep.name,salary)

session.close()

Output

Immediate Loading Technique

Immediate load is a technique in SQLAlchemy that loads related objects immediately along with the parent object when the query is executed. It allows you to retrieve all the necessary data in a single query, reducing the number of database round-trips. It can be achieved by using the immediateload() in options().

Python3

result = session.query(Department, 
         func.avg(Employee.age).label("averageAge"))\
               .join(Employee)\
               .group_by(Department.name)\
               .options(immediateload(Department.employees))\
               .all()

print("contains_eager technique result")
print("Dep averageAge")
for dep ,age in result:
    print(dep.name,age)

# closing the connection
session.close()

Output

Raise Load Technique

The raiseload() method is used to specify that a particular relationship should be loaded using a separate SQL query when accessed.

Python3

from sqlalchemy.orm import raiseload

# query to get the minimum age in each department
query = session.query(Department, func.min(Employee.age)) \
               .join(Employee) \
               .group_by(Department.name) \
               .options(raiseload(Department.employees))

result = query.all()

print("Raise Lad technique")
print("Name MinAge")
for dep, age in result:
    print(dep.name, age)

session.close()

Output

Noload Technique

The noload technique is used to specify that a particular relationship should not be loaded at all. It is useful when you want to exclude a specific relationship from being loaded in a query, even if it has a default loading strategy.

Python3

from sqlalchemy.orm import noload

query = session.query(Department, func.max(Employee.salary).label("maxSalary")) \
               .join(Employee,Department.id==Employee.dep_id) \
               .group_by(Department.id, Department.name) \
               .options(noload(Department.employees))\
               .all()
print("noload technique result")
print("Dep maxSalary")
for dep ,salary in query:
    print(dep.name,salary)

# closing the connection
session.close()

Output

Dep maxSalary
ECE 120000
CSE 75000
MECH 40000

Lazy Loading Technique

We can simply achieve the same functionality by using lazy loading with the help of foreign key relations no need for joins and group_by clauses.

Python3

departments = session.query(Department).all()
for department in departments:
    # Accessing the employees attribute triggers lazy loading
    employee_count = len(department.employees)
    print(f"Department: {department.name}, Employee Count: {employee_count}")
session.close()

Output

Defaultload Technique

The defaultload technique in SQLAlchemy is used to control the loading behavior of relationships when querying objects from the database. By default, SQLAlchemy loads all related objects for a given relationship when the parent object is queried. However, in some cases, loading all related objects upfront can lead to unnecessary performance overhead.

Python3

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.orm import defaultload
from sqlalchemy.ext.declarative import declarative_base
# Create Base class
Base = declarative_base()
# Establish connection
engine = create_engine("postgresql://postgres:root@localhost/test")
# Model classes
class Department(Base):
    __tablename__ = "department"
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    employees = relationship('Employee',
                back_populates="department")
class Employee(Base):
    __tablename__ = "employee"
    emp_id = Column(Integer, primary_key=True)
    emp_name = Column(String(50))
    age = Column(Integer)
    salary = Column(DECIMAL)
    dep_id = Column(Integer, ForeignKey("department.id"))
    department = relationship(
        "Department", back_populates="employees", 
                          overlaps="employees")
Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()
# Query to calculate the max age of employees in each department
query = session.query(Department, func.max(Employee.age)) \
               .join(Employee) \
               .group_by(Department.id, Department.name) \
               .options(defaultload(Department.employees))
result = query.all()
print("Default Loading")
print("dep Age")
for dep, max_age in result:
    print(dep.name, max_age)
session.close()

Output

SQLAlchemy Group By With Full Child Objects