Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dagrun state object #1507

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion airflow/jobs/scheduler_job_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1347,7 +1347,8 @@ def _start_queued_dagruns(self, session: Session) -> None:
)

def _update_state(dag: DAG, dag_run: DagRun):
dag_run.state = DagRunState.RUNNING
# dag_run.state = DagRunState.RUNNING
dag_run.set_state(DagRunState.RUNNING, session=session)
dag_run.start_date = timezone.utcnow()
if dag.timetable.periodic and not dag_run.external_trigger and dag_run.clear_number < 1:
# TODO: Logically, this should be DagRunInfo.run_after, but the
Expand Down
58 changes: 58 additions & 0 deletions airflow/migrations/versions/0135_2_9_0_add_state_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Add state table

Revision ID: 41729c6bd933
Revises: 1fd565369930
Create Date: 2024-03-05 00:01:45.086985

"""

import sqlalchemy as sa
from alembic import op

from airflow.migrations.db_types import StringID

# revision identifiers, used by Alembic.
revision = '41729c6bd933'
down_revision = '1fd565369930'
branch_labels = None
depends_on = None
airflow_version = '2.9.0'


def upgrade():
"""Apply Add state table"""
op.create_table(
"dagrun_state",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("type", StringID(), nullable=False),
sa.Column("name", sa.String(20), nullable=False),
sa.Column("message", StringID(), nullable=True),
sa.Column("timestamp", sa.DateTime()),
sa.Column("task_instance_id", sa.Integer(), nullable=True),
sa.Column("dag_run_id", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(['dag_run_id'], ["dag_run.id"]),
)


def downgrade():
"""Unapply Add state table"""
op.drop_table("base_state")
2 changes: 2 additions & 0 deletions airflow/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def __getattr__(name):
"Variable": "airflow.models.variable",
"XCom": "airflow.models.xcom",
"clear_task_instances": "airflow.models.taskinstance",
"DagRunStateModel": "airflow.models.state",
}

if TYPE_CHECKING:
Expand All @@ -133,6 +134,7 @@ def __getattr__(name):
from airflow.models.renderedtifields import RenderedTaskInstanceFields
from airflow.models.skipmixin import SkipMixin
from airflow.models.slamiss import SlaMiss
from airflow.models.state import DagRunStateModel
from airflow.models.taskfail import TaskFail
from airflow.models.taskinstance import TaskInstance, clear_task_instances
from airflow.models.taskreschedule import TaskReschedule
Expand Down
28 changes: 23 additions & 5 deletions airflow/models/dagrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from airflow.models.abstractoperator import NotMapped
from airflow.models.base import Base, StringID
from airflow.models.expandinput import NotFullyPopulated
from airflow.models.state import DagRunStateModel
from airflow.models.taskinstance import TaskInstance as TI
from airflow.models.tasklog import LogTemplate
from airflow.stats import Stats
Expand Down Expand Up @@ -122,6 +123,9 @@ class DagRun(Base, LoggingMixin):
execution_date = Column(UtcDateTime, default=timezone.utcnow, nullable=False)
start_date = Column(UtcDateTime)
end_date = Column(UtcDateTime)
dag_run_states = relationship(
"DagRunStateModel", back_populates="dag_run", cascade="all, delete, delete-orphan"
)
_state = Column("state", String(50), default=DagRunState.QUEUED)
run_id = Column(StringID(), nullable=False)
creating_job_id = Column(Integer)
Expand Down Expand Up @@ -230,6 +234,9 @@ def __init__(
self.conf = conf or {}
if state is not None:
self.state = state
self.dag_run_states.append(
DagRunStateModel.add_state(type=state, name=state, message="Dagrun created", dagrun=self)
)
if queued_at is NOTSET:
self.queued_at = timezone.utcnow() if state == DagRunState.QUEUED else None
else:
Expand Down Expand Up @@ -268,7 +275,8 @@ def logical_date(self) -> datetime:
def get_state(self):
return self._state

def set_state(self, state: DagRunState) -> None:
@provide_session
def set_state(self, state: DagRunState, session) -> None:
"""Change the state of the DagRan.

Changes to attributes are implemented in accordance with the following table
Expand Down Expand Up @@ -322,6 +330,16 @@ def set_state(self, state: DagRunState) -> None:
"""
if state not in State.dag_states:
raise ValueError(f"invalid DagRun state: {state}")
if (
not session.query(DagRunStateModel)
.filter(DagRunStateModel.type == state, DagRunStateModel.dag_run_id == self.id)
.count()
):
self.dag_run_states.append(
DagRunStateModel.add_state(
type=state, name=state, message="Dagrun state changed", dagrun=self, session=session
)
)
if self._state != state:
if state == DagRunState.QUEUED:
self.queued_at = timezone.utcnow()
Expand Down Expand Up @@ -769,7 +787,7 @@ def recalculate(self) -> _UnfinishedStates:
# if all tasks finished and at least one failed, the run failed
if not unfinished.tis and any(x.state in State.failed_states for x in tis_for_dagrun_state):
self.log.error("Marking run %s failed", self)
self.set_state(DagRunState.FAILED)
self.set_state(DagRunState.FAILED, session=session)
self.notify_dagrun_state_changed(msg="task_failure")

if execute_callbacks:
Expand All @@ -790,7 +808,7 @@ def recalculate(self) -> _UnfinishedStates:
# if all leaves succeeded and no unfinished tasks, the run succeeded
elif not unfinished.tis and all(x.state in State.success_states for x in tis_for_dagrun_state):
self.log.info("Marking run %s successful", self)
self.set_state(DagRunState.SUCCESS)
self.set_state(DagRunState.SUCCESS, session=session)
self.notify_dagrun_state_changed(msg="success")

if execute_callbacks:
Expand All @@ -811,7 +829,7 @@ def recalculate(self) -> _UnfinishedStates:
# if *all tasks* are deadlocked, the run failed
elif unfinished.should_schedule and not are_runnable_tasks:
self.log.error("Task deadlock (no runnable tasks); marking run %s failed", self)
self.set_state(DagRunState.FAILED)
self.set_state(DagRunState.FAILED, session=session)
self.notify_dagrun_state_changed(msg="all_tasks_deadlocked")

if execute_callbacks:
Expand All @@ -831,7 +849,7 @@ def recalculate(self) -> _UnfinishedStates:

# finally, if the leaves aren't done, the dag is still running
else:
self.set_state(DagRunState.RUNNING)
self.set_state(DagRunState.RUNNING, session=session)

if self._state == DagRunState.FAILED or self._state == DagRunState.SUCCESS:
msg = (
Expand Down
60 changes: 60 additions & 0 deletions airflow/models/state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from sqlalchemy import Column, DateTime, ForeignKey, Integer, String
from sqlalchemy.orm import relationship

from airflow.models import Base
from airflow.models.base import StringID
from airflow.utils import timezone
from airflow.utils.session import provide_session


class DagRunStateModel(Base):
"""A class to represent the state of a DagRun."""
__tablename__ = "dagrun_state"
id = Column(Integer, primary_key=True)
type = Column(StringID())
name = Column(String(20))
timestamp = Column(DateTime, default=timezone.utcnow())
message = Column(StringID())
dag_run_id = Column(Integer, ForeignKey("dag_run.id"))
dag_run = relationship("DagRun", back_populates="dag_run_states")

def __init__(self, type, name, message, timestamp=timezone.utcnow(), dagrun=None):
self.type = type
self.name = name
self.message = message
self.timestamp = timestamp
self.dag_run = dagrun

@staticmethod
@provide_session
def add_state(type, name, message="", dagrun=None, session=None):
exists = (
session.query(DagRunStateModel)
.filter(
DagRunStateModel.type == type,
DagRunStateModel.name == name,
DagRunStateModel.dag_run_id == dagrun.id,
)
.first()
)
if not exists:
state = DagRunStateModel(type=type, name=name, message=message, dagrun=dagrun)
return state
2 changes: 1 addition & 1 deletion docs/apache-airflow/img/airflow_erd.sha256
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8cf665c41c065c9368adf2e96450e8cc111dc0653bfabdee977fd6e4964f5646
eef85152669520dbd128ef2c41b06e1c34063f05f4102cdff3de92ded1ba2219
Loading
Loading