Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/database/setups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from sqlalchemy import Connection, text
from sqlalchemy.engine import Row


def get(setup_id: int, connection: Connection) -> Row | None:
row = connection.execute(
text(
"""
SELECT *
FROM algorithm_setup
WHERE sid = :setup_id
""",
),
parameters={"setup_id": setup_id},
)
return row.first()


def get_tags(setup_id: int, connection: Connection) -> list[Row]:
rows = connection.execute(
text(
"""
SELECT *
FROM setup_tag
WHERE id = :setup_id
""",
),
parameters={"setup_id": setup_id},
)
return list(rows.all())


def untag(setup_id: int, tag: str, connection: Connection) -> None:
connection.execute(
text(
"""
DELETE FROM setup_tag
WHERE id = :setup_id AND tag = :tag
""",
),
parameters={"setup_id": setup_id, "tag": tag},
)
2 changes: 2 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from routers.openml.evaluations import router as evaluationmeasures_router
from routers.openml.flows import router as flows_router
from routers.openml.qualities import router as qualities_router
from routers.openml.setups import router as setup_router
from routers.openml.study import router as study_router
from routers.openml.tasks import router as task_router
from routers.openml.tasktype import router as ttype_router
Expand Down Expand Up @@ -55,6 +56,7 @@ def create_api() -> FastAPI:
app.include_router(task_router)
app.include_router(flows_router)
app.include_router(study_router)
app.include_router(setup_router)
return app


Expand Down
14 changes: 13 additions & 1 deletion src/routers/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from http import HTTPStatus
from typing import Annotated

from fastapi import Depends
from fastapi import Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy import Connection

Expand Down Expand Up @@ -29,6 +30,17 @@ def fetch_user(
return User.fetch(api_key, user_data) if api_key else None


def fetch_user_or_raise(
user: Annotated[User | None, Depends(fetch_user)] = None,
) -> User:
if user is None:
raise HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "103", "message": "Authentication failed"},
)
return user


class Pagination(BaseModel):
offset: int = 0
limit: int = 100
20 changes: 8 additions & 12 deletions src/routers/openml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@
_format_parquet_url,
)
from database.users import User, UserGroup
from routers.dependencies import Pagination, expdb_connection, fetch_user, userdb_connection
from routers.dependencies import (
Pagination,
expdb_connection,
fetch_user,
fetch_user_or_raise,
userdb_connection,
)
from routers.types import CasualString128, IntegerRange, SystemString64, integer_range_regex
from schemas.datasets.openml import DatasetMetadata, DatasetStatus, Feature, FeatureType

Expand All @@ -32,29 +38,19 @@
def tag_dataset(
data_id: Annotated[int, Body()],
tag: Annotated[str, SystemString64],
user: Annotated[User | None, Depends(fetch_user)] = None,
user: Annotated[User, Depends(fetch_user_or_raise)],
expdb_db: Annotated[Connection, Depends(expdb_connection)] = None,
) -> dict[str, dict[str, Any]]:
tags = database.datasets.get_tags_for(data_id, expdb_db)
if tag.casefold() in [t.casefold() for t in tags]:
raise create_tag_exists_error(data_id, tag)

if user is None:
raise create_authentication_failed_error()

database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db)
return {
"data_tag": {"id": str(data_id), "tag": [*tags, tag]},
}


def create_authentication_failed_error() -> HTTPException:
return HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "103", "message": "Authentication failed"},
)


def create_tag_exists_error(data_id: int, tag: str) -> HTTPException:
return HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
Expand Down
45 changes: 45 additions & 0 deletions src/routers/openml/setups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from http import HTTPStatus
from typing import Annotated

from fastapi import APIRouter, Body, Depends, HTTPException
from sqlalchemy import Connection

import database.setups
from database.users import User, UserGroup
from routers.dependencies import expdb_connection, fetch_user_or_raise
from routers.types import SystemString64

router = APIRouter(prefix="/setup", tags=["setup"])


@router.post(path="/untag")
def untag_setup(
setup_id: Annotated[int, Body()],
tag: Annotated[str, SystemString64],
user: Annotated[User, Depends(fetch_user_or_raise)],
expdb_db: Annotated[Connection, Depends(expdb_connection)] = None,
) -> dict[str, dict[str, str]]:
if not database.setups.get(setup_id, expdb_db):
raise HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "472", "message": "Entity not found."},
)

setup_tags = database.setups.get_tags(setup_id, expdb_db)
matched_tag_row = next((t for t in setup_tags if t.tag.casefold() == tag.casefold()), None)

if not matched_tag_row:
raise HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "475", "message": "Tag not found."},
)

if matched_tag_row.uploader != user.user_id and UserGroup.ADMIN not in user.groups:
raise HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "476", "message": "Tag is not owned by you"},
)

database.setups.untag(setup_id, matched_tag_row.tag, expdb_db)

return {"setup_untag": {"id": str(setup_id)}}
62 changes: 62 additions & 0 deletions tests/routers/openml/migration/setups_migration_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from http import HTTPStatus

import httpx
import pytest
from starlette.testclient import TestClient

from tests.users import ApiKey


@pytest.mark.parametrize(
"setup_id",
[1, 999999],
ids=["existing setup", "unknown setup"],
)
@pytest.mark.parametrize(
"api_key",
[ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
ids=["Administrator", "regular user", "possible owner"],
)
Comment on lines +15 to +19
Copy link
Contributor

@coderabbitai coderabbitai bot Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Authentication failure (error code 103) is not covered by migration tests.

ApiKey.INVALID (or an absent key) is excluded from the api_key parametrize list, so the create_authentication_failed_error path in the router is never exercised here. Consider adding ApiKey.INVALID or None as a case to validate parity for the 103 response.

✅ Suggested addition
 `@pytest.mark.parametrize`(
     "api_key",
-    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
-    ids=["Administrator", "regular user", "possible owner"],
+    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER, ApiKey.INVALID],
+    ids=["Administrator", "regular user", "possible owner", "invalid key"],
 )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
@pytest.mark.parametrize(
"api_key",
[ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
ids=["Administrator", "regular user", "possible owner"],
)
`@pytest.mark.parametrize`(
"api_key",
[ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER, ApiKey.INVALID],
ids=["Administrator", "regular user", "possible owner", "invalid key"],
)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/routers/openml/migration/setups_migration_test.py` around lines 15 -
19, The parameterized test for "api_key" omits invalid/missing keys so the
router branch that triggers create_authentication_failed_error (error code 103)
is never exercised; update the pytest.mark.parametrize list for "api_key" to
include ApiKey.INVALID (and/or None) and add corresponding id(s) (e.g., "invalid
key" or "no key") so the test runs the authentication-failure path and asserts
the 103 response in the test that uses ApiKey.ADMIN / ApiKey.SOME_USER /
ApiKey.OWNER_USER currently.

Copy link
Author

@igennova igennova Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I intentionally omitted ApiKey.INVALID from the migration test parameters because the legacy PHP API has a known bug where it returns raw XML instead of JSON when authentication fails for setup queries (Code 103). Because our migration test framework strictly relies on original.json() == new.json() to prove parity, testing the invalid key pathway directly crashes Pytest with a JSONDecodeError.

  • We will be able to test the 103 Authentication failure fully and properly in standard Python unit tests once POST /setup/tag is migrated natively in my follow up PR and we no longer have to rely on php_api to stage the data.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@igennova, thank you for the detailed explanation! That makes perfect sense—if the legacy PHP API returns raw XML for Code 103 errors while the new API returns JSON, the migration test would indeed fail at the original.json() deserialization step before any comparison could happen.

Your plan to test authentication failure properly in standard unit tests once POST /setup/tag is migrated is the right approach. At that point, you won't need php_api to stage the data, and you'll be able to validate the 103 error path cleanly.

One optional suggestion: consider adding a brief comment in the test file or a TODO note explaining why ApiKey.INVALID is excluded from the parameterization, so future maintainers understand the constraint.


✏️ Learnings added
Learnt from: igennova
Repo: openml/server-api PR: 246
File: tests/routers/openml/migration/setups_migration_test.py:15-19
Timestamp: 2026-02-20T21:20:18.622Z
Learning: Migration tests for OpenML API endpoints that rely on php_api for data staging cannot test authentication failure paths (Code 103) when the legacy PHP API returns XML instead of JSON. These paths should be tested in standard Python unit tests after dependent endpoints are migrated.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.

@pytest.mark.parametrize(
"tag",
["totally_new_tag_for_migration_testing"],
)
def test_setup_untag_response_is_identical(
setup_id: int,
tag: str,
api_key: str,
py_api: TestClient,
php_api: httpx.Client,
) -> None:
if setup_id == 1:
php_api.post(
"/setup/tag",
data={"api_key": ApiKey.SOME_USER, "tag": tag, "setup_id": setup_id},
)

original = php_api.post(
"/setup/untag",
data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
)

if original.status_code == HTTPStatus.OK:
php_api.post(
"/setup/tag",
data={"api_key": ApiKey.SOME_USER, "tag": tag, "setup_id": setup_id},
)

new = py_api.post(
f"/setup/untag?api_key={api_key}",
json={"setup_id": setup_id, "tag": tag},
)

assert original.status_code == new.status_code

if new.status_code != HTTPStatus.OK:
assert original.json()["error"] == new.json()["detail"]
return

original_json = original.json()
new_json = new.json()

assert original_json == new_json
88 changes: 88 additions & 0 deletions tests/routers/openml/setups_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from collections.abc import Iterator
from http import HTTPStatus

import pytest
from sqlalchemy import Connection, text
from starlette.testclient import TestClient

from tests.users import ApiKey


@pytest.fixture
def mock_setup_tag(expdb_test: Connection) -> Iterator[None]:
expdb_test.execute(
text("DELETE FROM setup_tag WHERE id = 1 AND tag = 'test_unit_tag_123'"),
)
expdb_test.execute(
text("INSERT INTO setup_tag (id, tag, uploader) VALUES (1, 'test_unit_tag_123', 2)")
)
expdb_test.commit()

yield

expdb_test.execute(
text("DELETE FROM setup_tag WHERE id = 1 AND tag = 'test_unit_tag_123'"),
)
expdb_test.commit()


def test_setup_untag_missing_auth(py_api: TestClient) -> None:
response = py_api.post("/setup/untag", json={"setup_id": 1, "tag": "test_tag"})
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"}


def test_setup_untag_unknown_setup(py_api: TestClient) -> None:
response = py_api.post(
f"/setup/untag?api_key={ApiKey.SOME_USER}",
json={"setup_id": 999999, "tag": "test_tag"},
)
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"] == {"code": "472", "message": "Entity not found."}


def test_setup_untag_tag_not_found(py_api: TestClient) -> None:
response = py_api.post(
f"/setup/untag?api_key={ApiKey.SOME_USER}",
json={"setup_id": 1, "tag": "non_existent_tag_12345"},
)
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"] == {"code": "475", "message": "Tag not found."}


@pytest.mark.mut
@pytest.mark.usefixtures("mock_setup_tag")
def test_setup_untag_not_owned_by_you(py_api: TestClient) -> None:
response = py_api.post(
f"/setup/untag?api_key={ApiKey.OWNER_USER}",
json={"setup_id": 1, "tag": "test_unit_tag_123"},
)
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.json()["detail"] == {"code": "476", "message": "Tag is not owned by you"}


@pytest.mark.mut
@pytest.mark.parametrize(
"api_key",
[ApiKey.SOME_USER, ApiKey.ADMIN],
ids=["Owner", "Administrator"],
)
def test_setup_untag_success(api_key: str, py_api: TestClient, expdb_test: Connection) -> None:
expdb_test.execute(text("DELETE FROM setup_tag WHERE id = 1 AND tag = 'test_success_tag'"))
expdb_test.execute(
text("INSERT INTO setup_tag (id, tag, uploader) VALUES (1, 'test_success_tag', 2)")
)
expdb_test.commit()

response = py_api.post(
f"/setup/untag?api_key={api_key}",
json={"setup_id": 1, "tag": "test_success_tag"},
)

assert response.status_code == HTTPStatus.OK
assert response.json() == {"setup_untag": {"id": "1"}}

rows = expdb_test.execute(
text("SELECT * FROM setup_tag WHERE id = 1 AND tag = 'test_success_tag'")
).all()
assert len(rows) == 0