Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/68319.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Forward minion list events in Syndic cluster mode to enable proper job completion detection
32 changes: 30 additions & 2 deletions doc/topics/topology/syndic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,35 @@ are below their Syndics, the CLI requires a short wait time in order to allow
the Syndics to gather responses from their Minions. This value is defined in
the :conf_master:`syndic_wait` config option and has a default of five seconds.

Syndic config options
=====================
Syndic Modes
============

The Syndic can operate in two different modes, controlled by the ``syndic_mode`` configuration option:

sync
----
This is the default mode. In this mode, the Syndic will synchronize all events, job publish and return events between the
local master and higher level masters. This provides complete visibility of all events across all masters.

cluster
-------
In cluster mode, the Syndic will only synchronize job publish and return events between the local master and higher level masters.
This mode is more efficient in terms of the amount of events propagated to higher level masters, but provides less visibility since
not all events are propagated.

.. important::

When using ``cluster`` mode, you **must** set a unique ``master_id`` in the configuration of both:

- The syndic master (the lower-level masters)
- The higher-level master(s) (the master of masters)

Choose the mode based on your needs:
- Use ``sync`` mode when you need complete event visibility across your entire Salt infrastructure
- Use ``cluster`` mode when you want to optimize for performance and only need job-related information synchronized

Syndic Config Options
===================

These are the options that can be used to configure a Syndic node. Note that
other than ``id``, Syndic config options are placed in the Master config on the
Expand All @@ -223,6 +250,7 @@ Syndic node.
- :conf_master:`syndic_log_file`: path to the logfile (absolute or not)
- :conf_master:`syndic_pidfile`: path to the pidfile (absolute or not)
- :conf_master:`syndic_wait`: time in seconds to wait on returns from this syndic
- :conf_master:`syndic_mode`: The mode in which the syndic operates. Can be either ``sync`` (default) or ``cluster``

Minion Data Cache
=================
Expand Down
26 changes: 20 additions & 6 deletions salt/minion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3832,21 +3832,27 @@ def _process_event(self, raw):
# TODO: cleanup: Move down into event class
mtag, data = self.local.event.unpack(raw)
log.trace("Got event %s", mtag) # pylint: disable=no-member
job_event = False
return_event = True

tag_parts = mtag.split("/")
if (
len(tag_parts) >= 4
and tag_parts[1] == "job"
and salt.utils.jid.is_jid(tag_parts[2])
and tag_parts[3] == "ret"
and "return" in data
):
job_event = True

if self.syndic_mode == "cluster" and data.get("master_id", 0) == self.opts.get(
"master_id", 1
):
return_event = False

if job_event and tag_parts[3] == "ret" and "return" in data:
if "jid" not in data:
# Not a job return
return
if self.syndic_mode == "cluster" and data.get(
"master_id", 0
) == self.opts.get("master_id", 1):
if not return_event:
log.debug("Return received with matching master_id, not forwarding")
return

Expand Down Expand Up @@ -3882,7 +3888,15 @@ def _process_event(self, raw):
# TODO: config to forward these? If so we'll have to keep track of who
# has seen them
# if we are the top level masters-- don't forward all the minion events
if self.syndic_mode == "sync":

if (
self.syndic_mode == "sync"
# Even in cluster mode we need to forward the raw event with the minions
# list to determine which minions we expect to return on the master of masters.
or (
return_event and (salt.utils.jid.is_jid(mtag) and "minions" in data)
)
):
# Add generic event aggregation here
if "retcode" not in data:
self.raw_events.append({"data": data, "tag": mtag})
Expand Down
106 changes: 106 additions & 0 deletions tests/pytests/scenarios/syndic/cluster/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import logging

import pytest

from tests.conftest import FIPS_TESTRUN

log = logging.getLogger(__name__)


@pytest.fixture(scope="package")
def master(request, salt_factories):
config_defaults = {
"transport": request.config.getoption("--transport"),
}
config_overrides = {
"interface": "127.0.0.1",
"auto_accept": True,
"gather_job_timeout": 60,
"timeout": 60,
"order_masters": True,
"master_id": "master",
"fips_mode": FIPS_TESTRUN,
"publish_signing_algorithm": (
"PKCS1v15-SHA224" if FIPS_TESTRUN else "PKCS1v15-SHA1"
),
}
factory = salt_factories.salt_master_daemon(
"master",
defaults=config_defaults,
overrides=config_overrides,
extra_cli_arguments_after_first_start_failure=["--log-level=info"],
)
with factory.started(start_timeout=180):
yield factory


@pytest.fixture(scope="package")
def salt_cli(master):
return master.salt_cli(timeout=180)


@pytest.fixture(scope="package")
def syndic(master, salt_factories):

ret_port = master.config["ret_port"]
port = master.config["publish_port"]
addr = master.config["interface"]

# Force both master's publish port to be the same, this is a drawback of
# the current syndic design.
config_defaults = {
"transport": master.config["transport"],
"interface": "127.0.0.2",
"publish_port": f"{port}",
"syndic_mode": "cluster",
}
master_overrides = {
"interface": "127.0.0.2",
"auto_accept": True,
"syndic_master": f"{addr}",
"syndic_master_port": f"{ret_port}",
"master_id": "syndic",
"fips_mode": FIPS_TESTRUN,
"publish_signing_algorithm": (
"PKCS1v15-SHA224" if FIPS_TESTRUN else "PKCS1v15-SHA1"
),
}
minion_overrides = {
"master": "127.0.0.2",
"publish_port": f"{port}",
"fips_mode": FIPS_TESTRUN,
"encryption_algorithm": "OAEP-SHA224" if FIPS_TESTRUN else "OAEP-SHA1",
"signing_algorithm": "PKCS1v15-SHA224" if FIPS_TESTRUN else "PKCS1v15-SHA1",
}
factory = master.salt_syndic_daemon(
"syndic",
defaults=config_defaults,
master_overrides=master_overrides,
minion_overrides=minion_overrides,
extra_cli_arguments_after_first_start_failure=["--log-level=info"],
)
with factory.started(start_timeout=180):
yield factory


@pytest.fixture(scope="package")
def minion(syndic, salt_factories):
config_defaults = {
"transport": syndic.config["transport"],
}
port = syndic.master.config["ret_port"]
addr = syndic.master.config["interface"]
config_overrides = {
"master": f"{addr}:{port}",
"fips_mode": FIPS_TESTRUN,
"encryption_algorithm": "OAEP-SHA224" if FIPS_TESTRUN else "OAEP-SHA1",
"signing_algorithm": "PKCS1v15-SHA224" if FIPS_TESTRUN else "PKCS1v15-SHA1",
}
factory = syndic.master.salt_minion_daemon(
"minion",
defaults=config_defaults,
overrides=config_overrides,
extra_cli_arguments_after_first_start_failure=["--log-level=info"],
)
with factory.started(start_timeout=180):
yield factory
19 changes: 19 additions & 0 deletions tests/pytests/scenarios/syndic/cluster/test_syndic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
import time


def test_syndic(salt_cli, minion):
ret = salt_cli.run("test.ping", minion_tgt="*", _timeout=15)
assert ret.data == {
"syndic": True,
"minion": True,
}


def test_syndic_target_single_minion(salt_cli, minion):
"""
Test that the salt CLI exits after getting all returns + syndic_wait time
rather than waiting for the full gather_job_timeout set high in conftest.
"""
ret = salt_cli.run("test.ping", minion_tgt="minion", _timeout=15)
assert ret.data == True
Empty file.
Loading