Skip to content

TestTestgresCommon::test_restart[local2] is unstable #323

@dmitry-lipetsk

Description

@dmitry-lipetsk

https://github.com/postgrespro/testgres/actions/runs/21259348791/job/61182602347

___________________ TestTestgresCommon.test_restart[local2] ____________________
[gw3] linux -- Python 3.12.12 /tmp/testgres_venv/bin/python

self = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')
params = []

    def restart(self, params=[]):
        """
        Restart this node using pg_ctl.
    
        Args:
            params: additional arguments for pg_ctl.
    
        Returns:
            This instance of :class:`.PostgresNode`.
        """
    
        _params = [
            self._get_bin_path("pg_ctl"),
            "-D", self.data_dir,
            "-l", self.pg_log_file,
            "-w",  # wait
            "restart"
        ] + params  # yapf: disable
    
        try:
>           error_code, out, error = execute_utility2(self.os_ops, _params, self.utils_log_file, verbose=True)
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

_params    = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
files      = [('/tmp/tgsn_33ib9nn4/data/postgresql.conf', b"\nfsync = off\nmax_worker_processes = 10\nlog_statement = 'all'\nlisten...TAL:  could not create any TCP/IP sockets\n2026-01-22 18:23:43.372 GMT [173252] LOG:  database system is shut down\n')]
msg        = 'Cannot restart node'
params     = []
self       = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')

src/node.py:1208: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/utils.py:101: in execute_utility2
    exit_status, out, error = os_ops.exec_command(
        args       = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
        exec_env   = None
        ignore_errors = False
        logfile    = '/tmp/tgsn_33ib9nn4/logs/utils.log'
        os_ops     = <testgres.operations.local_ops.LocalOperations object at 0x7ffa8106e9c0>
        verbose    = True
/tmp/testgres_venv/lib/python3.12/site-packages/testgres/operations/local_ops.py:266: in exec_command
    RaiseError.UtilityExitedWithNonZeroCode(
        cmd        = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
        cwd        = None
        encoding   = 'UTF-8'
        error      = 'pg_ctl: could not start server\nExamine the log output.\n'
        exec_env   = None
        expect_error = False
        get_process = False
        ignore_errors = False
        input      = None
        output     = 'waiting for server to shut down.... done\nserver stopped\nwaiting for server to start.... stopped waiting\n'
        process    = <Popen: returncode: 1 args: ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9...>
        self       = <testgres.operations.local_ops.LocalOperations object at 0x7ffa8106e9c0>
        shell      = False
        stderr     = None
        stdin      = None
        stdout     = None
        text       = False
        timeout    = None
        verbose    = True
        wait_exit  = False
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

cmd = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
exit_code = 1
msg_arg = 'pg_ctl: could not start server\nExamine the log output.\n'
error = 'pg_ctl: could not start server\nExamine the log output.\n'
out = 'waiting for server to shut down.... done\nserver stopped\nwaiting for server to start.... stopped waiting\n'

    @staticmethod
    def UtilityExitedWithNonZeroCode(cmd, exit_code, msg_arg, error, out):
        assert type(exit_code) == int  # noqa: E721
    
        msg_arg_s = __class__._TranslateDataIntoString(msg_arg)
        assert type(msg_arg_s) == str  # noqa: E721
    
        msg_arg_s = msg_arg_s.strip()
        if msg_arg_s == "":
            msg_arg_s = "#no_error_message"
    
        message = "Utility exited with non-zero code (" + str(exit_code) + "). Error: `" + msg_arg_s + "`"
>       raise ExecUtilException(
            message=message,
            command=cmd,
            exit_code=exit_code,
            out=out,
            error=error)
E       testgres.operations.exceptions.ExecUtilException: Utility exited with non-zero code (1). Error: `pg_ctl: could not start server
E       Examine the log output.`
E       Command: /usr/local/bin/pg_ctl -D /tmp/tgsn_33ib9nn4/data -l /tmp/tgsn_33ib9nn4/logs/postgresql.log -w restart
E       Exit code: 1
E       ---- Error:
E       pg_ctl: could not start server
E       Examine the log output.
E       
E       ---- Out:
E       waiting for server to shut down.... done
E       server stopped
E       waiting for server to start.... stopped waiting

__class__  = <class 'testgres.operations.raise_error.RaiseError'>
cmd        = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
error      = 'pg_ctl: could not start server\nExamine the log output.\n'
exit_code  = 1
message    = 'Utility exited with non-zero code (1). Error: `pg_ctl: could not start server\nExamine the log output.`'
msg_arg    = 'pg_ctl: could not start server\nExamine the log output.\n'
msg_arg_s  = 'pg_ctl: could not start server\nExamine the log output.'
out        = 'waiting for server to shut down.... done\nserver stopped\nwaiting for server to start.... stopped waiting\n'

/tmp/testgres_venv/lib/python3.12/site-packages/testgres/operations/raise_error.py:18: ExecUtilException

The above exception was the direct cause of the following exception:

self = <tests.test_testgres_common.TestTestgresCommon object at 0x7ffa7e1ce330>
node_svc = <tests.helpers.global_data.PostgresNodeService object at 0x7ffa7eb55c70>

    def test_restart(self, node_svc: PostgresNodeService):
        assert isinstance(node_svc, PostgresNodeService)
    
        with __class__.helper__get_node(node_svc) as node:
            node.init().start()
    
            # restart, ok
            res = node.execute('select 1')
            assert (res == [(1,)])
>           node.restart()

__class__  = <class 'tests.test_testgres_common.TestTestgresCommon'>
node       = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')
node_svc   = <tests.helpers.global_data.PostgresNodeService object at 0x7ffa7eb55c70>
res        = [(1,)]
self       = <tests.test_testgres_common.TestTestgresCommon object at 0x7ffa7e1ce330>

tests/test_testgres_common.py:324: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/node.py:1214: in restart
    raise_from(StartNodeException(msg, files), e)
        _params    = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
        files      = [('/tmp/tgsn_33ib9nn4/data/postgresql.conf', b"\nfsync = off\nmax_worker_processes = 10\nlog_statement = 'all'\nlisten...TAL:  could not create any TCP/IP sockets\n2026-01-22 18:23:43.372 GMT [173252] LOG:  database system is shut down\n')]
        msg        = 'Cannot restart node'
        params     = []
        self       = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

value = None
from_value = ExecUtilException(message='Utility exited with non-zero code (1). Error: `pg_ctl: could not start server\nExamine the ...nwaiting for server to start.... stopped waiting\n', error='pg_ctl: could not start server\nExamine the log output.\n')

>   ???
E   src.exceptions.StartNodeException: Cannot restart node
E   /tmp/tgsn_33ib9nn4/data/postgresql.conf
E   ----
E   b"\nfsync = off\nmax_worker_processes = 10\nlog_statement = 'all'\nlisten_addresses = '127.0.0.1'\nport = 48670\n\nmax_replication_slots = 10\nmax_wal_senders = 10\n\nhot_standby = on\nwal_keep_size = 320\nwal_level = 'replica'\n"
E   
E   /tmp/tgsn_33ib9nn4/data/postgresql.auto.conf
E   ----
E   b'# Do not edit this file manually!\n# It will be overwritten by the ALTER SYSTEM command.\n'
E   
E   /tmp/tgsn_33ib9nn4/data/pg_hba.conf
E   ----
E   b'local   all             all                                     trust\nhost    all             all             127.0.0.1/32            trust\nhost    all             all             ::1/128                 trust\nlocal   replication     all                                     trust\nhost    replication     all             127.0.0.1/32            trust\nhost    replication     all             ::1/128                 trust\nlocal\treplication\tall\t\t\ttrust\nhost\treplication\tall\t127.0.0.1/32\ttrust\nhost\treplication\tall\t::1/128\t\ttrust\nhost\treplication\tall\t127.0.0.0/24\t\ttrust\nhost\tall\tall\t127.0.0.0/24\t\ttrust\nhost\tall\tall\tall\ttrust\nhost\treplication\tall\tall\ttrust\n'
E   
E   /tmp/tgsn_33ib9nn4/logs/postgresql.log
E   ----
E   b'2026-01-22 18:23:43.153 GMT [173201] LOG:  starting PostgreSQL 18.1 on x86_64-pc-linux-musl, compiled by gcc (Alpine 15.2.0) 15.2.0, 64-bit\n2026-01-22 18:23:43.153 GMT [173201] LOG:  listening on IPv4 address "127.0.0.1", port 48670\n2026-01-22 18:23:43.153 GMT [173201] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.48670"\n2026-01-22 18:23:43.156 GMT [173208] LOG:  database system was shut down at 2026-01-22 18:23:43 GMT\n2026-01-22 18:23:43.159 GMT [173201] LOG:  database system is ready to accept connections\n2026-01-22 18:23:43.247 GMT [173229] LOG:  statement: select 1\n2026-01-22 18:23:43.248 GMT [173201] LOG:  received fast shutdown request\n2026-01-22 18:23:43.248 GMT [173201] LOG:  aborting any active transactions\n2026-01-22 18:23:43.251 GMT [173201] LOG:  background worker "logical replication launcher" (PID 173211) exited with exit code 1\n2026-01-22 18:23:43.252 GMT [173206] LOG:  shutting down\n2026-01-22 18:23:43.252 GMT [173206] LOG:  checkpoint starting: shutdown immediate\n2026-01-22 18:23:43.253 GMT [173206] LOG:  checkpoint complete: wrote 0 buffers (0.0%), wrote 3 SLRU buffers; 0 WAL file(s) added, 0 removed, 0 recycled; write=0.001 s, sync=0.001 s, total=0.001 s; sync files=0, longest=0.000 s, average=0.000 s; distance=0 kB, estimate=0 kB; lsn=0/175F068, redo lsn=0/175F068\n2026-01-22 18:23:43.262 GMT [173201] LOG:  database system is shut down\n2026-01-22 18:23:43.369 GMT [173252] LOG:  starting PostgreSQL 18.1 on x86_64-pc-linux-musl, compiled by gcc (Alpine 15.2.0) 15.2.0, 64-bit\n2026-01-22 18:23:43.369 GMT [173252] LOG:  could not bind IPv4 address "127.0.0.1": Address in use\n2026-01-22 18:23:43.369 GMT [173252] HINT:  Is another postmaster already running on port 48670? If not, wait a few seconds and retry.\n2026-01-22 18:23:43.369 GMT [173252] WARNING:  could not create listen socket for "127.0.0.1"\n2026-01-22 18:23:43.369 GMT [173252] FATAL:  could not create any TCP/IP sockets\n2026-01-22 18:23:43.372 GMT [173252] LOG:  database system is shut down\n'

from_value = ExecUtilException(message='Utility exited with non-zero code (1). Error: `pg_ctl: could not start server\nExamine the ...nwaiting for server to start.... stopped waiting\n', error='pg_ctl: could not start server\nExamine the log output.\n')
value      = None

<string>:3: StartNodeException
------------------------------ Captured log call -------------------------------
DEBUG    root:port_manager__generic.py:97 [port manager] Port 48670 is reserved.
DEBUG    root:port_manager__generic.py:97 [port manager] Port 48670 is released.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions