From 4d1b1e7c41d04adad4c93d4a01c43551e100c0b1 Mon Sep 17 00:00:00 2001
From: Yin Congmin <congmin.yin@intel.com>
Date: Tue, 3 Jan 2023 16:51:04 +0800
Subject: [PATCH 01/10] control/discovery: add discovery controller

The discovery contrller implement the basic function.
Use command "python3 -m control.discovery" to start
discovery controller. Client can use command
"nvme discover -t tcp -a ip -s port" to get log pages.

The configuration is in ceph-nvmeof.conf [discovery] part.

feature: https://github.com/ceph/ceph-nvmeof/issues/108

Signed-off-by: Yin Congmin <congmin.yin@intel.com>
---
 ceph-nvmeof.conf     |    6 +
 control/discovery.py | 1077 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1083 insertions(+)
 create mode 100644 control/discovery.py

diff --git a/ceph-nvmeof.conf b/ceph-nvmeof.conf
index 078e3b9b..c895d6fc 100644
--- a/ceph-nvmeof.conf
+++ b/ceph-nvmeof.conf
@@ -19,6 +19,12 @@ state_update_interval_sec = 5
 #max_controller_id = 65519
 enable_discovery_controller = false
 
+[discovery]
+
+addr = 0.0.0.0
+port = 8009
+debug = 20
+
 [ceph]
 pool = rbd
 config_file = /etc/ceph/ceph.conf
diff --git a/control/discovery.py b/control/discovery.py
new file mode 100644
index 00000000..ff875720
--- /dev/null
+++ b/control/discovery.py
@@ -0,0 +1,1077 @@
+#
+#  Copyright (c) 2021 International Business Machines
+#  All rights reserved.
+#
+#  SPDX-License-Identifier: LGPL-3.0-or-later
+#
+#  Authors: congmin.yin@intel.com
+#
+
+import argparse
+import grpc
+import json
+import logging
+from .config import GatewayConfig
+from .state import GatewayState, LocalGatewayState, OmapGatewayState, GatewayStateHandler
+
+import rados
+from typing import Dict, Optional
+
+import socket
+import threading
+import time
+import enum
+import uuid
+import struct
+import selectors
+from dataclasses import dataclass, field
+from ctypes import Structure, LittleEndianStructure, c_bool, c_ubyte, c_uint8, c_uint16, c_uint32, c_uint64, c_float
+
+# NVMe tcp pdu type
+class NVME_TCP_PDU(enum.IntFlag):
+    ICREQ = 0x0
+    ICRESP = 0x1
+    H2C_TERM = 0x2
+    C2H_TERM = 0x3
+    CMD = 0x4
+    RSP = 0x5
+    H2C_DATA = 0x6
+    C2H_DATA = 0x7
+    TCP_R2T = 0x9
+
+# NVMe tcp opcode
+class NVME_TCP_OPC(enum.IntFlag):
+    DELETE_SQ = 0x0
+    CREATE_SQ = 0x1
+    GET_LOG_PAGE = 0x2
+    DELETE_CQ = 0x4
+    CREATE_CQ = 0x5
+    IDENTIFY = 0x6
+    ABORT = 0x8
+    SET_FEATURES = 0x9
+    GET_FEATURES = 0xa
+    ASYNC_EVE_REQ = 0xc
+    NS_MGMT = 0xd
+    FW_COMMIT = 0x10
+    FW_IMG_DOWNLOAD = 0x11
+    NS_ATTACH = 0x15
+    KEEP_ALIVE = 0x18
+    FABRIC_TYPE = 0x7F
+
+# NVMe tcp fabric command type
+class NVME_TCP_FCTYPE(enum.IntFlag):
+    PROP_SET = 0x0
+    CONNECT = 0x1
+    PROP_GET = 0x4
+    AUTH_SEND = 0x5
+    AUTH_RECV = 0x6
+    DISCONNECT = 0x8
+
+# NVMe controller register space offsets
+class NVME_CTL(enum.IntFlag):
+    CAPABILITIES = 0x0
+    VERSION = 0x08
+    CONFIGURATION = 0x14
+    STATUS = 0x1c
+
+
+# NVM subsystem types
+class NVMF_SUBTYPE(enum.IntFlag):
+    # Discovery type for NVM subsystem
+    DISCOVERY = 0x1
+    # NVMe type for NVM subsystem
+    NVME = 0x2
+
+# NVMe over Fabrics transport types
+class TRANSPORT_TYPES(enum.IntFlag):
+    RDMA = 0x1
+    FC = 0x2
+    TCP = 0x3
+    INTRA_HOST = 0xfe
+
+# Address family types
+class ADRFAM_TYPES(enum.IntFlag):
+    ipv4 = 0x1
+    ipv6 = 0x2
+    ib = 0x3
+    fc = 0x4
+    intra_host = 0xfe
+
+# Transport requirement, secure channel requirements
+# Connections shall be made over a fabric secure channel
+class NVMF_TREQ_SECURE_CHANNEL(enum.IntFlag):
+    NOT_SPECIFIED = 0x0
+    REQUIRED = 0x1
+    NOT_REQUIRED = 0x2
+
+# maximum number of connections
+MAX_CONNECTION = 10240
+
+# NVMe tcp package length, refer: MTU = 1500 bytes
+NVME_TCP_PDU_UNIT = 1024
+
+# Max SQ head pointer
+SQ_HEAD_MAX = 128
+
+@dataclass
+class Connection:
+    """Data used multiple times in each connection."""
+
+    connection: socket.socket = None
+    allow_listeners: list = field(default_factory=list)
+    log_page: bytearray = field(default_factory=bytearray)
+    recv_buffer: bytearray = field(default_factory=bytearray)
+    nvmeof_connect_data_hostid: str = field(default_factory=str)
+    nvmeof_connect_data_cntlid: int = 0
+    nvmeof_connect_data_subnqn: str = field(default_factory=str)
+    nvmeof_connect_data_hostnqn: str = field(default_factory=str)
+    sq_head_ptr: int = 0
+    unsent_log_page_len: int = 0
+    property_data: str = field(default_factory=str)
+    property_set: bool = False
+    shutdown_now: bool = False
+    controller_id: uuid = None
+    gen_cnt: int = 0
+    recv_async: bool = False
+    async_cmd_id: int = 0
+    keep_alive_time: float = 0.0
+    keep_alive_timeout: int = 0
+
+class AutoSerializableStructure(LittleEndianStructure):
+    def __add__(self, other):
+        if isinstance(other, LittleEndianStructure):
+            return bytes(self) + bytes(other)
+        elif isinstance(other, bytes):
+            return bytes(self) + other
+        else:
+            raise ValueError("error message format.")
+
+class Pdu(AutoSerializableStructure):
+    _fields_ = [
+        ("type", c_uint8),
+        ("specical_flag", c_uint8),
+        ("header_length", c_uint8),
+        ("data_offset", c_uint8),
+        ("packet_length", c_uint32),
+    ]
+
+class ICResp(AutoSerializableStructure):
+    _fields_ = [
+        # pdu version format
+        ("version_format", c_uint16),
+        # controller Pdu data alignment
+        ("data_alignment", c_uint8),
+        # digest types enabled
+        ("digest_types", c_uint8),
+        # Maximum data capsules per r2t supported
+        ("maximum_data_capsules", c_uint32)
+    ]
+
+class CqeConnect(AutoSerializableStructure):
+    _fields_ = [
+        ("controller_id", c_uint16),
+        ("authentication", c_uint16),
+        ("reserved", c_uint32),
+        ("sq_head_ptr", c_uint16),
+        ("sq_id", c_uint16),
+        ("cmd_id", c_uint16),
+        ("status", c_uint16)
+    ]
+
+class CqePropertyGetSet(AutoSerializableStructure):
+    _fields_ = [
+        # property data for property get, reserved for property set
+        ("property_data", c_ubyte * 8),
+        ("sq_head_ptr", c_uint16),
+        ("sq_id", c_uint16),
+        ("cmd_id", c_uint16),
+        ("status", c_uint16)
+    ]
+
+class NVMeTcpDataPdu(AutoSerializableStructure):
+    _fields_ = [
+        ("cmd_id", c_uint16),
+        ("transfer_tag", c_uint16),
+        ("data_offset", c_uint32),
+        ("data_length", c_uint32),
+        ("reserved", c_uint32)
+    ]
+
+class NVMeIdentify(AutoSerializableStructure):
+    _fields_ = [
+        # skip some fields, include VID, SSVID, SN, MN
+        ("todo_fields1", c_ubyte * 64),
+        ("firmware_revision", c_ubyte * 8),
+        # RAB, IEEE, CMIC
+        ("todo_fields2", c_ubyte * 5),
+        # maximum data transfer size
+        ("mdts", c_uint8),
+        ("controller_id", c_uint16),
+        ("version", c_uint8 * 4),
+        # RTD3R, RTD3E
+        ("todo_fields3", c_ubyte * 8),
+        # optional asynchronous events supported
+        ("oaes", c_ubyte * 4),
+        # CTRATT, RRLS, CNTRLTYPE, FGUID, NVMe Management Interface, OACS, ACL
+        ("todo_fields4", c_ubyte * 163),
+        # asynchronous events request limit
+        ("aerl", c_uint8),
+        ("firmware_updates", c_uint8),
+        # log page attributes
+        ("lpa", c_uint8),
+        # error log page entries(ELPE)
+        ("elpe", c_uint8),
+        # NPSS, AVSCC, APSTA, WCTEMP, CCTEMP, MTFA, HMPRE, HMIN, TNVMCAP...
+        # TODO: keep alive support - timer value(KAS)?
+        ("todo_fields5", c_ubyte * 251),
+        # maximum outstanding commands
+        ("max_cmd", c_uint16),
+        # number of namespace, optional NVM command support
+        ("todo_fields6", c_uint8 * 6),
+        # fused operation support
+        ("fused_operation", c_uint16),
+        # FNA, VWC, AWUN, AWUPF, NVSCC, NWPC
+        ("todo_fields7", c_uint8 * 8),
+        # atomic compare & write unit
+        ("acwu", c_uint16),
+        ("reserved1", c_uint16),
+        # SGL support
+        ("sgls", c_uint8 * 4),
+        # maxinum number of allowed namespaces
+        ("mnan", c_uint32),
+        ("reserved2", c_ubyte * 224),
+        ("subnqn", c_ubyte * 256),
+        ("reserved3", c_ubyte * 768),
+        ("nvmeof_attributes", c_ubyte * 256),
+        ("power_state_attributes", c_ubyte * 1024),
+        ("vendor_specific", c_ubyte * 1024)
+    ]
+
+# for set feature, keep alive and async
+class  CqeNVMe(AutoSerializableStructure):
+    _fields_ = [
+        ("dword0", c_uint32),
+        ("dword1", c_uint32),
+        ("sq_head_ptr", c_uint16),
+        ("sq_id", c_uint16),
+        ("cmd_id", c_uint16),
+        ("status", c_uint16)
+    ]
+
+class NVMeGetLogPage(AutoSerializableStructure):
+    _fields_ = [
+        # generation counter
+        ("genctr", c_uint64),
+        # number of records
+        ("numrec", c_uint64),
+        #record format
+        ("recfmt", c_uint16),
+        ("reserved", c_ubyte * 1006)
+    ]
+
+class DiscoveryLogEntry(AutoSerializableStructure):
+    _fields_ = [
+        ("trtype", c_uint8),
+        ("adrfam", c_uint8),
+        ("subtype", c_uint8),
+        ("treq", c_uint8),
+        ("port_id", c_uint16),
+        ("controller_id", c_uint16),
+        # admin max SQ size
+        ("asqsz", c_uint16),
+        ("reserved1", c_ubyte * 22),
+        ("trsvcid", c_ubyte * 32),
+        ("reserved2", c_ubyte * 192),
+        ("subnqn", c_ubyte * 256),
+        ("traddr", c_ubyte * 256),
+        # Transport specific address subtype
+        ("tsas", c_ubyte * 256)
+    ]
+
+class DiscoveryService:
+    """Implements discovery controller.
+
+    Response discover request from initiator.
+
+    Instance attributes:
+        version: Discovery controller version
+        config: Basic gateway parameters
+        logger: Logger instance to track discovery controller events
+        omap_name: OMAP object name
+        ioctx: I/O context which allows OMAP access
+        discovery_addr: Discovery controller addr which allows initiator send command
+        discovery_port: Discovery controller's listening port
+    """
+
+    BDEV_PREFIX = "bdev_"
+    NAMESPACE_PREFIX = "namespace_"
+    SUBSYSTEM_PREFIX = "subsystem_"
+    HOST_PREFIX = "host_"
+    LISTENER_PREFIX = "listener_"
+
+    def __init__(self, config):
+        self.version = 1
+        self.config = config
+        self.lock = threading.Lock()
+
+        self.logger = logging.getLogger(__name__)
+        log_level = self.config.get("discovery", "debug")
+        self.logger.setLevel(level=int(log_level))
+
+        gateway_group = self.config.get("gateway", "group")
+        self.omap_name = f"nvmeof.{gateway_group}.state" \
+            if gateway_group else "nvmeof.state"
+        self.logger.info(f"log pages info from omap: {self.omap_name}")
+
+        ceph_pool = self.config.get("ceph", "pool")
+        ceph_conf = self.config.get("ceph", "config_file")
+        conn = rados.Rados(conffile=ceph_conf)
+        conn.connect()
+        self.ioctx = conn.open_ioctx(ceph_pool)
+
+        self.discovery_addr = self.config.get("discovery", "addr")
+        self.discovery_port = self.config.get("discovery", "port")
+        if self.discovery_addr == '' or self.discovery_port == '':
+            self.logger.error("discovery addr/port are empty.")
+            assert 0
+        self.logger.info(f"discovery addr: {self.discovery_addr} port: {self.discovery_port}")
+
+        self.conn_vals = {}
+        self.connection_counter = 1
+        self.selector = selectors.DefaultSelector()
+
+    def _read_all(self) -> Dict[str, str]:
+        """Reads OMAP and returns dict of all keys and values."""
+
+        with rados.ReadOpCtx() as read_op:
+            iter, _ = self.ioctx.get_omap_vals(read_op, "", "", -1)
+            self.ioctx.operate_read_op(read_op, self.omap_name)
+            omap_dict = dict(iter)
+        return omap_dict
+
+    def _get_vals(self, omap_dict, prefix):
+        """Read values from the OMAP dict."""
+
+        return [json.loads(val.decode('utf-8')) for (key, val) in omap_dict.items()
+            if key.startswith(prefix)]
+
+    def reply_initialize(self, conn):
+        """Reply initialize request."""
+
+        self.logger.debug("handle ICreq.")
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.ICRESP
+        pdu_reply.header_length = 128
+        pdu_reply.packet_length = 128
+
+        icresp_reply = ICResp()
+        icresp_reply.maximum_data_capsules = 131072
+
+        try:
+            conn.sendall(pdu_reply + icresp_reply + bytes(112))
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply initialize connection request.")
+        return 0
+
+    def reply_fc_cmd_connect(self, conn, data, cmd_id):
+        """Reply connect request."""
+
+        self.logger.debug("handle connect request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        hf_nvmeof_cmd_connect_rsvd1 = struct.unpack_from('<19B', data, 13)
+        SIGL1 = struct.unpack_from('<QI4B', data, 32)
+        address = SIGL1[0]
+        length = SIGL1[1]
+        reserved3 = SIGL1[2]
+        descriptor_type = SIGL1[5]
+
+        CMD2 = struct.unpack_from('<HHHBBI', data, 48)
+        record_format = CMD2[0]
+        queue_id = CMD2[1]
+        submission_queue_size = CMD2[2]
+        connect_attributes = CMD2[3]
+        keep_alive_timeout = CMD2[5]
+
+        self_conn =self.conn_vals[conn.fileno()]
+        self_conn.keep_alive_time = time.time()
+        if keep_alive_timeout == 0:
+            keep_alive_timeout = 15000
+        self.logger.debug(f"connection keep alive {keep_alive_timeout}ms.")
+        self_conn.keep_alive_timeout = keep_alive_timeout
+
+        self_conn.nvmeof_connect_data_hostid = \
+            struct.unpack_from('<16B', data, 72)
+        self_conn.nvmeof_connect_data_cntlid = \
+            struct.unpack_from('<H', data, 88)[0]
+        self_conn.nvmeof_connect_data_subnqn = \
+            struct.unpack_from('<256B', data, 328)
+        self_conn.nvmeof_connect_data_hostnqn = \
+            struct.unpack_from('<256B', data, 584)
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.RSP
+        pdu_reply.header_length = 24
+        pdu_reply.packet_length = 24
+
+        connect_reply = CqeConnect()
+        connect_reply.controller_id = int(self_conn.controller_id)
+        connect_reply.sq_head_ptr = self_conn.sq_head_ptr
+        connect_reply.cmd_id = cmd_id
+
+        try:
+            conn.sendall(pdu_reply + connect_reply)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply connect request.")
+        return 0
+
+    def reply_fc_cmd_prop_get(self, conn, data, cmd_id):
+        """Reply property get request."""
+
+        self.logger.debug("handle property get request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvmeof_prop_get_set_rsvd0 = struct.unpack_from('<35B', data, 13)
+        # property size = (attrib+1)x4, 0x1 means 8 bytes
+        nvmeof_prop_get_set_attrib = struct.unpack_from('<1B', data, 48)[0]
+        nvmeof_prop_get_set_rsvd1 = struct.unpack_from('<3B', data, 49)
+        nvmeof_prop_get_set_offset = struct.unpack_from('<I', data, 52)[0]
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.RSP
+        pdu_reply.header_length = 24
+        pdu_reply.packet_length = 24
+
+        # reply different property data
+        property_get = CqePropertyGetSet()
+        if NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.CAPABILITIES:
+            # controller capabilities
+            # \x7f = maxinum queue entries support:128
+            # \x01 contiguous queues required: true
+            # \x1e timeout(to ready status): 1e(15000ms default in server side)
+            # \x20 command sets supportd: 1 (NVM IO command set)?
+            property_get.property_data = (c_ubyte * 8)(0x7f, 0x00, \
+                0x01, 0x1e, 0x20, 0x00, 0x00, 0x00)
+        elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.CONFIGURATION:
+            # b'\x00\x00\x46\x00\x00\x00\x00\x00'
+            # 0x46: IO Submission Queue Entry Size: 0x6 (64 bytes)
+            # IO Completion Queue Entry Size: 0x4 (16 bytes)
+            property_data = (c_ubyte * 8)(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+            if len(self_conn.property_data) == 8:
+                property_data = (c_ubyte * 8)(*self_conn.property_data)
+            property_get.property_data = property_data
+        elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.STATUS:
+            property_data = (c_ubyte * 8)(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+            if len(self_conn.property_data) == 8:
+                property_data = (c_ubyte * 8)(*self_conn.property_data)
+            shutdown_notification = ((property_data[1]) >> 6) & 0x3
+            if shutdown_notification == 0:
+                if self_conn.property_set is True:
+                    # controller status: ready
+                    property_get.property_data = (c_ubyte * 8)(0x01, 0x00, \
+                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+                else:
+                    property_get.property_data = (c_ubyte * 8)(0x00, 0x00, \
+                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+            else:
+                # here shutdown_notification should be 0x1
+                property_get.property_data = (c_ubyte * 8)(0x09, 0x00, \
+                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+                self_conn.shutdown_now = True
+        elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.VERSION:
+            # nvme version: 1.3
+            property_get.property_data = (c_ubyte * 8)(0x00, 0x03, \
+                0x01, 0x00, 0x00, 0x00, 0x00, 0x00)
+        else:
+            self.logger.error("unsupported type for property getting.")
+        property_get.sq_head_ptr = self_conn.sq_head_ptr
+        property_get.cmd_id = cmd_id
+
+        try:
+            conn.sendall(pdu_reply + property_get)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply property get request.")
+        return 0
+
+    def reply_fc_cmd_prop_set(self, conn, data, cmd_id):
+        """Reply property set request."""
+
+        self.logger.debug("handle property set request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvmeof_prop_get_set_rsvd0 = struct.unpack_from('<35B', data, 13)
+        nvmeof_prop_get_set_attrib = struct.unpack_from('<1B', data, 48)[0]
+        nvmeof_prop_get_set_rsvd1 = struct.unpack_from('<3B', data, 49)
+        nvmeof_prop_get_set_offset = struct.unpack_from('<I', data, 52)[0]
+
+        if NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.CAPABILITIES:
+            self.logger.error("property setting of capabilities is not supported.")
+        elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.CONFIGURATION:
+            self_conn.property_data = struct.unpack_from('<8B', data, 56)
+            self_conn.property_set = True
+        elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.STATUS:
+            self.logger.error("property setting of status is not supported.")
+        elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.VERSION:
+            self.logger.error("property setting of version is not supported.")
+        else:
+            self.logger.error("unsupported type for property settings.")
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.RSP
+        pdu_reply.header_length = 24
+        pdu_reply.packet_length = 24
+
+        property_set = CqePropertyGetSet()
+        property_set.sq_head_ptr = self_conn.sq_head_ptr
+        property_set.cmd_id = cmd_id
+
+        try:
+            conn.sendall(pdu_reply + property_set)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply property set request.")
+        return 0
+
+    def reply_identify(self, conn, data, cmd_id):
+        """Reply identify request."""
+
+        self.logger.debug("handle identify request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvme_nsid = struct.unpack_from('<I', data, 12)[0]
+        nvme_rsvd1 = struct.unpack_from('<Q', data, 16)[0]
+        nvme_mptr = struct.unpack_from('<Q', data, 24)[0]
+        nvme_sgl = struct.unpack_from('<16B', data, 32)
+        nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
+        nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        nvme_identify_dword10 = struct.unpack_from('<I', data, 48)[0]
+        nvme_identify_dword11 = struct.unpack_from('<I', data, 52)[0]
+        nvme_identify_dword12 = struct.unpack_from('<I', data, 56)[0]
+        nvme_identify_dword13 = struct.unpack_from('<I', data, 60)[0]
+        nvme_identify_dword14 = struct.unpack_from('<I', data, 64)[0]
+        nvme_identify_dword15 = struct.unpack_from('<I', data, 68)[0]
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.C2H_DATA
+        # 0x0c == 0b1100, means pdu data last: set, pdu data success: set
+        pdu_reply.specical_flag = 0x0c
+        pdu_reply.header_length = 24
+        pdu_reply.data_offset = 24
+        pdu_reply.packet_length = 4120
+
+        nvme_tcp_data_pdu = NVMeTcpDataPdu()
+        nvme_tcp_data_pdu.cmd_id = cmd_id
+        nvme_tcp_data_pdu.data_length = 4096
+
+        identify_reply = NVMeIdentify()
+        # version: 0.01
+        identify_reply.firmware_revision = (c_ubyte * 8)(0x30, 0x30, \
+            0x2e, 0x30, 0x31, 0x20, 0x20, 0x20)
+        # maximum data transfer size: 2^5=32 pages
+        identify_reply.mdts = 0x05
+        identify_reply.controller_id = int(self_conn.controller_id)
+        # version: 1.3, TODO: get from hardware
+        identify_reply.version = (c_uint8 * 4)(0x00, 0x30, 0x01, 0x00)
+        identify_reply.oaes = (c_ubyte * 4)(0x00, 0x00, 0x00, 0x80)
+        # asynchronous events request limit: 4 events(3+1)
+        identify_reply.aerl = 0x03
+        # log page attributes: Extended data get log page support: True
+        identify_reply.lpa = 0x04
+        # error log page entries:128 entries(127+1)
+        identify_reply.elpe = 0x7f
+        identify_reply.max_cmd = 128
+        identify_reply.fused_operation = 0x0001
+        identify_reply.sgls = (c_uint8 * 4)(0x05, 0x00, 0x10, 0x00)
+        if len(self_conn.nvmeof_connect_data_subnqn) == 256:
+            for i in range(256):
+                identify_reply.subnqn[i] = self_conn.nvmeof_connect_data_subnqn[i]
+        else:
+            self.logger.error("error subnqn.")
+            return -1
+
+        try:
+            conn.sendall(pdu_reply + nvme_tcp_data_pdu + identify_reply)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply identify request.")
+        return 0
+
+    def reply_set_feature(self, conn, data, cmd_id):
+        """Reply set feature request."""
+
+        self.logger.debug("handle set feature request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvme_nsid = struct.unpack_from('<I', data, 12)[0]
+        nvme_rsvd1 = struct.unpack_from('<Q', data, 16)[0]
+        nvme_mptr = struct.unpack_from('<Q', data, 24)[0]
+        nvme_sgl = struct.unpack_from('<16B', data, 32)
+        nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
+        nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        # dword10 may include Feature Identifier:
+        # Asynchronous Event Configuration (0x0b) (Not currently used)
+        nvme_set_features_dword10 = struct.unpack_from('<I', data, 48)[0]
+        nvme_set_features_dword11 = struct.unpack_from('<I', data, 52)[0]
+        nvme_set_features_dword12 = struct.unpack_from('<I', data, 56)[0]
+        nvme_set_features_dword13 = struct.unpack_from('<I', data, 60)[0]
+        nvme_set_features_dword14 = struct.unpack_from('<I', data, 64)[0]
+        nvme_set_features_dword15 = struct.unpack_from('<I', data, 68)[0]
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.RSP
+        pdu_reply.header_length = 24
+        pdu_reply.packet_length = 24
+
+        set_feature_reply =  CqeNVMe()
+        set_feature_reply.sq_head_ptr = self_conn.sq_head_ptr
+        set_feature_reply.cmd_id = cmd_id
+
+        try:
+            conn.sendall(pdu_reply + set_feature_reply)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply set feature request.")
+        return 0
+
+    def reply_get_feature(self, conn, data, cmd_id):
+        """Reply get feature request."""
+
+        self.logger.debug("handle get feature request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvme_nsid = struct.unpack_from('<I', data, 12)[0]
+        nvme_rsvd1 = struct.unpack_from('<Q', data, 16)[0]
+        nvme_mptr = struct.unpack_from('<Q', data, 24)[0]
+        nvme_sgl = struct.unpack_from('<16B', data, 32)
+        nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
+        nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        nvme_get_features_dword10 = struct.unpack_from('<I', data, 48)[0]
+        nvme_get_features_dword11 = struct.unpack_from('<I', data, 52)[0]
+        nvme_get_features_dword12 = struct.unpack_from('<I', data, 56)[0]
+        nvme_get_features_dword13 = struct.unpack_from('<I', data, 60)[0]
+        nvme_get_features_dword14 = struct.unpack_from('<I', data, 64)[0]
+        nvme_get_features_dword15 = struct.unpack_from('<I', data, 68)[0]
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.RSP
+        pdu_reply.header_length = 24
+        pdu_reply.packet_length = 24
+
+        get_feature_reply =  CqeNVMe()
+        get_feature_reply.dword0 = self_conn.keep_alive_timeout
+        get_feature_reply.sq_head_ptr = self_conn.sq_head_ptr
+        get_feature_reply.cmd_id = cmd_id
+
+        try:
+            conn.sendall(pdu_reply + get_feature_reply)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply get feature request.")
+        return 0
+
+    def reply_get_log_page(self, conn, data, cmd_id):
+        """Reply get log page request."""
+
+        self.logger.debug("handle get log page request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        my_omap_dict = self._read_all()
+        listeners = self._get_vals(my_omap_dict, self.LISTENER_PREFIX)
+        hosts = self._get_vals(my_omap_dict, self.HOST_PREFIX)
+        if len(self_conn.nvmeof_connect_data_hostnqn) != 256:
+            self.logger.error("error hostnqn.")
+            return -1
+        hostnqn = "".join(chr(byte) for byte \
+            in self_conn.nvmeof_connect_data_hostnqn[:256]).rstrip('\x00')
+
+        nvme_nsid = struct.unpack_from('<I', data, 12)[0]
+        nvme_rsvd1 = struct.unpack_from('<Q', data, 16)[0]
+        nvme_mptr = struct.unpack_from('<Q', data, 24)[0]
+        nvme_sgl = struct.unpack_from('<16B', data, 32)
+        nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
+        nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        nvme_get_logpage_dword10 = struct.unpack_from('<I', data, 48)[0]
+        # nvme_get_logpage_numd indicate the reply bytes, rule: (values+1)*4
+        nvme_get_logpage_numd = struct.unpack_from('<I', data, 50)[0]
+        nvme_data_len = (nvme_get_logpage_numd + 1) * 4
+        nvme_get_logpage_dword11 = struct.unpack_from('<I', data, 52)[0]
+        # Logpage offset overlaps with dword13
+        nvme_logpage_offset = struct.unpack_from('<Q', data, 56)[0]
+        nvme_get_logpage_dword13 = struct.unpack_from('<I', data, 60)[0]
+        nvme_get_logpage_dword14 = struct.unpack_from('<I', data, 64)[0]
+        nvme_get_logpage_dword15 = struct.unpack_from('<I', data, 68)[0]
+        get_logpage_lid = nvme_get_logpage_dword10 & 0xFF
+        get_logpage_lsp = (nvme_get_logpage_dword10 >> 8) & 0x1F
+        get_logpage_lsi = nvme_get_logpage_dword11 >> 16
+        get_logpage_uid_idx = nvme_get_logpage_dword14 & 0x3F
+
+        if get_logpage_lid != 0x70:
+            self.logger.error("request type error, not discovery request.")
+            return -1
+
+        # Filter listeners based on host access permissions
+        allow_listeners = self_conn.allow_listeners
+        if len(allow_listeners) == 0:
+            for host in hosts:
+                a = host["host_nqn"]
+                if host["host_nqn"] == '*' or host["host_nqn"] == hostnqn:
+                    for listener in listeners:
+                        # TODO: It is better to change nqn in the "listener"
+                        # to subsystem_nqn to avoid confusion
+                        if host["subsystem_nqn"] == listener["nqn"]:
+                            allow_listeners += [listener,]
+            self_conn.allow_listeners = allow_listeners
+
+        # Prepare all log page data segments
+        if self_conn.unsent_log_page_len == 0 and nvme_data_len > 16:
+            self_conn.unsent_log_page_len = 1024 * (len(allow_listeners) + 1)
+            self_conn.log_page = bytearray(self_conn.unsent_log_page_len)
+
+            nvme_get_log_page_reply = NVMeGetLogPage()
+            nvme_get_log_page_reply.genctr = self_conn.gen_cnt
+            nvme_get_log_page_reply.numrec = len(allow_listeners)
+            self_conn.log_page[0:1024] = nvme_get_log_page_reply
+
+            # log entries
+            log_entry_counter = 0
+            while log_entry_counter < len(allow_listeners):
+                log_entry = DiscoveryLogEntry()
+                trtype = TRANSPORT_TYPES[allow_listeners[log_entry_counter]["trtype"]]
+                if trtype is None:
+                    self.logger.error("unsupported transport type")
+                else:
+                    log_entry.trtype = trtype
+                adrfam = ADRFAM_TYPES[allow_listeners[log_entry_counter]["adrfam"]]
+                if adrfam is None:
+                    self.logger.error("unsupported adress family")
+                else:
+                    log_entry.adrfam = adrfam
+                log_entry.subtype = NVMF_SUBTYPE.NVME
+                log_entry.treq = NVMF_TREQ_SECURE_CHANNEL.NOT_REQUIRED
+                log_entry.port_id = log_entry_counter
+                log_entry.controller_id = 0xffff
+                log_entry.asqsz = 128
+                # transport service indentifier
+                log_entry.trsvcid = (c_ubyte * 32)(*[c_ubyte(x) for x \
+                    in allow_listeners[log_entry_counter]["trsvcid"].encode()])
+                log_entry.trsvcid[len(allow_listeners[log_entry_counter]["trsvcid"]):] = \
+                    [c_ubyte(0x20)] * (32 - len(allow_listeners[log_entry_counter]["trsvcid"]))
+                # NVM subsystem qualified name
+                log_entry.subnqn = (c_ubyte * 256)(*[c_ubyte(x) for x \
+                    in allow_listeners[log_entry_counter]["nqn"].encode()])
+                log_entry.subnqn[len(allow_listeners[log_entry_counter]["nqn"]):] = \
+                    [c_ubyte(0x00)] * (256 - len(allow_listeners[log_entry_counter]["nqn"]))
+                # Transport address
+                log_entry.traddr = (c_ubyte * 256)(*[c_ubyte(x) for x \
+                    in allow_listeners[log_entry_counter]["traddr"].encode()])
+                log_entry.traddr[len(allow_listeners[log_entry_counter]["traddr"]):] = \
+                    [c_ubyte(0x20)] * (256 - len(allow_listeners[log_entry_counter]["traddr"]))
+
+                self_conn.log_page[1024*(log_entry_counter+1): \
+                    1024*(log_entry_counter+2)] = log_entry
+                log_entry_counter += 1
+        else:
+            self.logger.debug("in the process of sending log pages...")
+
+        reply = b''
+        pdu_and_nvme_pdu_len = 8 + 16
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.C2H_DATA
+        pdu_reply.specical_flag = 0x0c
+        pdu_reply.header_length = pdu_and_nvme_pdu_len
+        pdu_reply.data_offset = pdu_and_nvme_pdu_len
+        pdu_reply.packet_length = pdu_and_nvme_pdu_len + nvme_data_len
+
+        nvme_tcp_data_pdu = NVMeTcpDataPdu()
+        nvme_tcp_data_pdu.cmd_id = cmd_id
+        nvme_tcp_data_pdu.data_length = nvme_data_len
+
+        # reply based on the received get log page request packet(length)
+        if nvme_data_len == 16:
+            # nvme cli version: 1.x
+            nvme_get_log_page_reply = NVMeGetLogPage()
+            nvme_get_log_page_reply.genctr = self_conn.gen_cnt
+            nvme_get_log_page_reply.numrec = len(listeners)
+
+            reply = pdu_reply + nvme_tcp_data_pdu + bytes(nvme_get_log_page_reply)[:16]
+        elif nvme_data_len == 1024 and nvme_logpage_offset == 0:
+            # nvme cli version: 2.x
+            nvme_get_log_page_reply = NVMeGetLogPage()
+            nvme_get_log_page_reply.genctr = self_conn.gen_cnt
+            nvme_get_log_page_reply.numrec = len(listeners)
+
+            reply = pdu_reply + nvme_tcp_data_pdu+ nvme_get_log_page_reply
+        elif nvme_data_len % 1024 == 0:
+            # reply log pages
+            reply = pdu_reply + nvme_tcp_data_pdu + \
+                self_conn.log_page[nvme_logpage_offset:nvme_logpage_offset+nvme_data_len]
+            self_conn.unsent_log_page_len -= nvme_data_len
+            if self_conn.unsent_log_page_len == 0:
+                self_conn.log_page = b''
+                self_conn.property_set = False
+                self_conn.allow_listeners = []
+        else:
+            self.logger.error("request log page lenghth error. It need to be 16 or n*1024")
+            return -1
+        try:
+            conn.sendall(reply)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply get log page request.")
+        return 0
+
+    def reply_keep_alive(self, conn, data, cmd_id):
+        """Reply keep alive request."""
+
+        self.logger.debug("handle keep alive request.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvme_sgl = struct.unpack_from('<16B', data, 32)
+        nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
+        nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        nvme_keep_alive_dword10 = struct.unpack_from('<I', data, 48)[0]
+        nvme_keep_alive_dword11 = struct.unpack_from('<I', data, 52)[0]
+        nvme_keep_alive_dword12 = struct.unpack_from('<I', data, 56)[0]
+        nvme_keep_alive_dword13 = struct.unpack_from('<I', data, 60)[0]
+        nvme_keep_alive_dword14 = struct.unpack_from('<I', data, 64)[0]
+        nvme_keep_alive_dword15 = struct.unpack_from('<I', data, 68)[0]
+
+        pdu_reply = Pdu()
+        pdu_reply.type = NVME_TCP_PDU.RSP
+        pdu_reply.header_length = 24
+        pdu_reply.packet_length = 24
+
+         # Cqe for keep alive
+        keep_alive_reply = CqeNVMe()
+        keep_alive_reply.sq_head_ptr = self_conn.sq_head_ptr
+        keep_alive_reply.cmd_id = cmd_id
+
+        try:
+            conn.sendall(pdu_reply + keep_alive_reply)
+        except BrokenPipeError:
+            self.logger.error("client disconnected unexpectedly.")
+            return -1
+        self.logger.debug("reply keep alive request.")
+        return 0
+
+    def store_async(self, conn, data, cmd_id):
+        """Parse and store async event."""
+
+        self.logger.debug("parse and store async event.")
+        self_conn = self.conn_vals[conn.fileno()]
+        nvme_sgl = struct.unpack_from('<16B', data, 32)
+        nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
+        nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        nvme_async_dword10 = struct.unpack_from('<I', data, 48)[0]
+        nvme_async_dword11 = struct.unpack_from('<I', data, 52)[0]
+        nvme_async_dword12 = struct.unpack_from('<I', data, 56)[0]
+        nvme_async_dword13 = struct.unpack_from('<I', data, 60)[0]
+        nvme_async_dword14 = struct.unpack_from('<I', data, 64)[0]
+        nvme_async_dword15 = struct.unpack_from('<I', data, 68)[0]
+
+        self_conn.recv_async = True
+        self_conn.async_cmd_id = cmd_id
+
+    def _state_notify_update(self, update, is_add_req):
+        """Notify and reply async event."""
+
+        for key in list(self.conn_vals.keys()):
+            if self.conn_vals[key].recv_async is True:
+                async_reply = CqeNVMe()
+                # async_event_type:0x2 async_event_info:0xf0 log_page_identifier:0x70
+                async_reply.dword0 = b'\x02\xf0\x70\x00'
+                async_reply.sq_head_ptr = self.conn_vals[key].sq_head_ptr
+                async_reply.cmd_id = self.conn_vals[key].async_cmd_id
+
+                try:
+                    self.conn_vals[key].connection.sendall(async_reply)
+                except BrokenPipeError:
+                    self.logger.error("client disconnected unexpectedly.")
+                    return -1
+                self.logger.debug("notify and reply async request.")
+                self.conn_vals[key].recv_async = False
+                return 0
+
+    def handle_timeout(self):
+        """Handle connection timeout."""
+
+        while True:
+            for key in list(self.conn_vals.keys()):
+                if self.conn_vals[key].keep_alive_timeout != 0 and \
+                  time.time() - self.conn_vals[key].keep_alive_time >= \
+                  self.conn_vals[key].keep_alive_timeout / 1000:
+                    # Adding locks to prevent another thread from processing sudden requests.
+                    # Is there a better way?
+                    with self.lock:
+                        self.logger.debug(f"discover request from {self.conn_vals[key].connection} timeout.")
+                        self.selector.unregister(self.conn_vals[key].connection)
+                        self.conn_vals[key].connection.close()
+                        del self.conn_vals[key]
+
+            time.sleep(1)
+
+    def reply_fabric_request(self, conn, data, cmd_id):
+        """Reply fabric request."""
+
+        fabric_type = struct.unpack_from('<B', data, 12)[0]
+        handle_fabric = {
+            NVME_TCP_FCTYPE.CONNECT: self.reply_fc_cmd_connect,
+            NVME_TCP_FCTYPE.PROP_GET: self.reply_fc_cmd_prop_get,
+            NVME_TCP_FCTYPE.PROP_SET: self.reply_fc_cmd_prop_set
+        }
+        class UnknownFabricType(BaseException):
+            def __init__(self, fabric_type):
+                super().__init__(f"unsupported opcode: {fabric_type}")
+        try:
+            err = handle_fabric[fabric_type](conn, data, cmd_id)
+        except KeyError as e:
+            raise UnknownFabricType(fabric_type) from e
+        return err
+
+    def nvmeof_tcp_connection(self, conn, mask):
+        """Handle request."""
+
+        err = 0
+        with self.lock:
+            if conn.fileno() not in self.conn_vals:
+                self.logger.debug(f"connection {conn} timeout")
+                return
+            self_conn = self.conn_vals[conn.fileno()]
+            self_conn.keep_alive_time = time.time()
+            try:
+                message = conn.recv(NVME_TCP_PDU_UNIT)
+                if message:
+                    self_conn.recv_buffer += message
+                else:
+                    return
+            except BlockingIOError:
+                self.logger.error("recived data failed.")
+
+            while True:
+                if len(self_conn.recv_buffer) < 8:
+                    return
+                pdu = struct.unpack_from('<BBBBI', self_conn.recv_buffer, 0)
+                pdu_type = pdu[0]
+                psh_flag = pdu[1]
+                ph_len = pdu[2]
+                ph_off = pdu[3]
+                package_len = pdu[4]
+                if len(self_conn.recv_buffer) < package_len:
+                    return
+
+                data = self_conn.recv_buffer[:package_len]
+                self_conn.recv_buffer = self_conn.recv_buffer[package_len:]
+
+                self_conn.sq_head_ptr += 1
+                if self_conn.sq_head_ptr > SQ_HEAD_MAX:
+                    self_conn.sq_head_ptr = 1
+
+                if NVME_TCP_PDU(pdu_type) == NVME_TCP_PDU.ICREQ:
+                    err = self.reply_initialize(conn)
+
+                elif NVME_TCP_PDU(pdu_type) == NVME_TCP_PDU.CMD:
+                    CMD1 = struct.unpack_from('<BBH', data, 8)
+                    opcode = CMD1[0]
+                    reserved = CMD1[1]
+                    cmd_id = CMD1[2]
+
+                    handle_opcode = {
+                        NVME_TCP_OPC.FABRIC_TYPE: self.reply_fabric_request,
+                        NVME_TCP_OPC.GET_LOG_PAGE: self.reply_get_log_page,
+                        NVME_TCP_OPC.IDENTIFY: self.reply_identify,
+                        NVME_TCP_OPC.SET_FEATURES: self.reply_set_feature,
+                        NVME_TCP_OPC.GET_FEATURES: self.reply_get_feature,
+                        NVME_TCP_OPC.KEEP_ALIVE: self.reply_keep_alive,
+                        NVME_TCP_OPC.ASYNC_EVE_REQ: self.store_async
+                    }
+                    class UnknownNVMEOpcode(BaseException):
+                        def __init__(self, opcode):
+                            super().__init__(f"unsupported opcode: {opcode}")
+                    try:
+                        err = handle_opcode[opcode](conn, data, cmd_id)
+                    except KeyError as e:
+                        raise UnknownNVMEOpcode(opcode) from e
+                else:
+                    self.logger.error("unsupported pduGLOBAL_CNLID type: {pdu_type}")
+
+                if err == -1 or self_conn.shutdown_now is True:
+                    del self.conn_vals[conn.fileno()]
+                    self.selector.unregister(conn)
+                    self.logger.debug(f"discover request from {conn} finished.")
+                    conn.close()
+                    return
+
+    def nvmeof_accept(self, sock, mask):
+        """Accept connection."""
+
+        conn, addr = sock.accept()
+        self.logger.debug(f"accept connection {conn} from {addr}")
+        conn.setblocking(False)
+        with self.lock:
+            if conn.fileno() not in self.conn_vals:
+                self.conn_vals[conn.fileno()] = Connection(
+                    connection=conn,
+                    controller_id=uuid.uuid4(),
+                    gen_cnt=self.connection_counter
+                )
+                self.connection_counter += 1
+
+        self.selector.register(conn, selectors.EVENT_READ, \
+                               self.nvmeof_tcp_connection)
+
+    def start_service(self):
+        """Enable listening on the server side."""
+
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.bind((self.discovery_addr, int(self.discovery_port)))
+        sock.listen(MAX_CONNECTION)
+        sock.setblocking(False)
+        self.selector.register(sock, selectors.EVENT_READ, self.nvmeof_accept)
+        self.logger.debug("waiting for connection...")
+        t = threading.Thread(target=self.handle_timeout)
+        t.start()
+
+        omap_state = OmapGatewayState(self.config)
+        local_state = LocalGatewayState()
+        gateway_state = GatewayStateHandler(self.config, local_state,
+                                            omap_state, self._state_notify_update)
+        gateway_state.start_update()
+
+        try:
+          while True:
+            events = self.selector.select()
+            for key, mask in events:
+                callback = key.data
+                callback(key.fileobj, mask)
+        except KeyboardInterrupt:
+          for key in self.conn_vals:
+            self.conn_vals[key].connection.close()
+          self.selector.close()
+          self.logger.debug("received a ctrl+C interrupt. exiting...")
+
+def main(args=None):
+    # Set up root logger
+    logging.basicConfig()
+    logger = logging.getLogger()
+    logger.setLevel(logging.DEBUG)
+
+    parser = argparse.ArgumentParser(prog="python3 -m control",
+                                     description="Discover NVMe gateways")
+    parser.add_argument(
+        "-c",
+        "--config",
+        default="ceph-nvmeof.conf",
+        type=str,
+        help="Path to config file",
+    )
+    args = parser.parse_args()
+
+    config = GatewayConfig(args.config)
+    discovery_service = DiscoveryService(config)
+    discovery_service.start_service()
+
+if __name__ == "__main__":
+    main()

From aa345895f129571c94442043bdd4e2d890b81712 Mon Sep 17 00:00:00 2001
From: Yin Congmin <congmin.yin@intel.com>
Date: Tue, 5 Sep 2023 17:02:33 +0800
Subject: [PATCH 02/10] update README for discovery service

add the method of starting discovery service

Signed-off-by: Yin Congmin <congmin.yin@intel.com>
---
 README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/README.md b/README.md
index d3e6d3b9..f8b8441c 100644
--- a/README.md
+++ b/README.md
@@ -238,6 +238,15 @@ Once the NVMe-oF target is
     Hello NVMe-oF
     ```
 
+### Start Discovery Service(Optional)
+
+The discovery service can provide all the targets that the current user can access, and these target information is sourced from ceph omap. These targets may be running or just a record.
+
+1. Start Discovery Service
+    ```bash
+    $ python3 -m control.discovery
+    ```
+
 ## Advanced
 
 ### Configuration

From deffe91c784fcc1515b1260ef455f7aa1f82670a Mon Sep 17 00:00:00 2001
From: Alexander Indenbaum <aindenba@redhat.com>
Date: Fri, 25 Aug 2023 09:23:04 +0300
Subject: [PATCH 03/10] initial discovery   - container   - CI test

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
---
 .env                                  |   8 +-
 .github/workflows/build-container.yml | 196 +++++++++++++++++++++++---
 Makefile                              |   4 +-
 ceph-nvmeof.conf                      |   1 -
 docker-compose.yaml                   |  17 ++-
 mk/demo.mk                            |   2 +
 6 files changed, 200 insertions(+), 28 deletions(-)

diff --git a/.env b/.env
index 787c843f..a7dbc404 100644
--- a/.env
+++ b/.env
@@ -24,7 +24,6 @@ NVMEOF_DESCRIPTION="Service to provide block storage on top of Ceph for platform
 NVMEOF_URL="https://github.com/ceph/ceph-nvmeof"
 NVMEOF_TAGS="ceph,nvme-of,nvme-of gateway,rbd,block storage"
 NVMEOF_WANTS="ceph,rbd"
-NVMEOF_HOSTNAME="nvmeof"
 NVMEOF_IP_ADDRESS="192.168.13.3"
 NVMEOF_IO_PORT=4420
 NVMEOF_GW_PORT=5500
@@ -61,4 +60,9 @@ RBD_IMAGE_NAME="demo_image"
 RBD_IMAGE_SIZE="10M"
 BDEV_NAME="demo_bdev"
 NQN="nqn.2016-06.io.spdk:cnode1"
-NVMEOF_FIRST_GATEWAY_NAME="gw-1"
+SERIAL="SPDK00000000000001"
+
+# Container names in docker compose environent
+DISC1="ceph-nvmeof_discovery_1"
+GW1="ceph-nvmeof_nvmeof_1"
+GW2="ceph-nvmeof_nvmeof_2"
diff --git a/.github/workflows/build-container.yml b/.github/workflows/build-container.yml
index 5287c912..ebd58bed 100644
--- a/.github/workflows/build-container.yml
+++ b/.github/workflows/build-container.yml
@@ -181,7 +181,7 @@ jobs:
         run: |
           . .env
 
-          echo using gateway $NVMEOF_IP_ADDRESS port $NVMEOF_GW_PORT timeout ${{ env.WAIT_TIMEOUT_MINS }}
+          echo using gateway $NVMEOF_IP_ADDRESS port $NVMEOF_GW_PORT
           until nc -z $NVMEOF_IP_ADDRESS $NVMEOF_GW_PORT; do
             echo -n .
             sleep ${{ env.WAIT_INTERVAL_SECS }}
@@ -243,21 +243,6 @@ jobs:
           path: |
             /tmp/coredump/core.*
 
-      #- name: Test mounting nvmeof device locally
-      #  run: |
-      #    . .env
-      #    sudo modprobe nvme-fabrics
-      #    sudo nvme list
-      #    sudo nvme discover -t tcp -a $NVMEOF_IP_ADDRESS -s $NVMEOF_IO_PORT
-      #    sudo nvme connect -t tcp --traddr $NVMEOF_IP_ADDRESS -s $NVMEOF_IO_PORT -n nqn.2016-06.io.spdk:cnode1
-      #    sudo nvme list
-      #    NVMEOF_DEVICE=$(sudo nvme list -o json | jq '.Devices[] | select(.ModelNumber=="SPDK bdev Controller").DevicePath')
-      #    sudo mkfs $NVMEOF_DEVICE
-      #    MOUNT_POINT=$(mktemp -d)
-      #    sudo mount $NVMEOF_DEVICE $MOUNT_POINT
-      #    cd $MOUNT_POINT
-      #    touch test
-
       # For debugging purposes (provides an SSH connection to the runner)
       #- name: Setup tmate session
       #  uses: mxschmitt/action-tmate@v3
@@ -274,18 +259,188 @@ jobs:
           make down
           make clean
 
-  push-to-registry:
-    if: github.event_name == 'release' && startsWith(github.ref, 'refs/tags/v')
-    needs: [pytest, demo]
+  discovery:
+    needs: build
     runs-on: ubuntu-latest
+    env:
+      HUGEPAGES: 768 # 3 spdk instances
+
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
 
+      - name: Setup huge-pages
+        run: make setup HUGEPAGES=$HUGEPAGES
+
       - name: Download container images
         uses: actions/download-artifact@v3
         with:
-          name: images
+          name: ceph_nvmeof_container_images-${{ github.run_number }}
+
+      - name: Load container images
+        run: |
+          docker load < nvmeof.tar
+          docker load < nvmeof-cli.tar
+          docker load < vstart-cluster.tar
+          docker load < bdevperf.tar
+
+      - name: Start discovery controller
+        run: |
+          docker-compose up --detach discovery
+
+      - name: Wait for discovery controller to be listening
+        timeout-minutes: 3
+        run: |
+          . .env
+          container_ip() {
+            docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1"
+          }
+
+          ip=$(container_ip $DISC1)
+          echo using discovery controller $ip $NVMEOF_DISC_PORT
+          until nc -z $ip $NVMEOF_DISC_PORT; do
+            echo -n .
+            sleep ${{ env.WAIT_INTERVAL_SECS }}
+          done
+
+      - name: Start gateway with scale=2
+        run: |
+          docker-compose up --detach --scale nvmeof=2 nvmeof
+
+      - name: Wait for gateways to be listening
+        timeout-minutes: 3
+        run: |
+          . .env
+          container_ip() {
+            docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1"
+          }
+
+          for gw in $GW1 $GW2; do
+            ip=$(container_ip $gw)
+            echo using gateway $ip $NVMEOF_GW_PORT
+            until nc -z $ip $NVMEOF_GW_PORT; do
+              echo -n .
+              sleep ${{ env.WAIT_INTERVAL_SECS }}
+            done
+            echo
+          done
+
+      - name: List containers
+        if: success() || failure()
+        run: |
+          docker-compose ps
+
+      - name: List processes
+        if: success() || failure()
+        run: |
+          docker-compose top
+
+      - name: Create RBD image
+        run: |
+          make rbd OPTS=-T
+
+      - name: Set up target
+        run: |
+          . .env
+
+          container_ip() {
+            docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1"
+          }
+          # container id is the default hostname in docker environent
+          # i.e. default gateway name
+          container_id() {
+            docker ps -q -f name=$1
+          }
+          cli_gw() {
+            gw=$1
+            shift
+            docker-compose run --rm nvmeof-cli --server-address $gw --server-port $NVMEOF_GW_PORT "$@"
+          }
+
+          gw1=$(container_ip $GW1)
+          echo ℹ️ Using GW RPC $GW1 address $gw1 port $NVMEOF_GW_PORT
+          cli_gw $gw1 get_subsystems
+          cli_gw $gw1 create_bdev --pool $RBD_POOL --image $RBD_IMAGE_NAME --bdev $BDEV_NAME
+          cli_gw $gw1 create_subsystem --subnqn $NQN --serial $SERIAL
+          cli_gw $gw1 add_namespace --subnqn $NQN --bdev $BDEV_NAME
+          for gw in $GW1 $GW2; do
+            ip=$(container_ip $gw)
+            name=$(container_id $gw) # default hostname - container id
+            echo ℹ️ Create listener address $ip gateway $name
+            cli_gw $ip create_listener --subnqn $NQN --gateway-name $name --traddr $ip --trsvcid $NVMEOF_IO_PORT
+          done
+          cli_gw $gw1 add_host --subnqn $NQN --host "*"
+          for gw in $GW1 $GW2; do
+            ip=$(container_ip $gw)
+            echo ℹ️ Subsystems for name $gw ip $ip
+            cli_gw $ip get_subsystems
+          done
+
+      - name: Run bdevperf discovery
+        run: |
+          # See
+          # - https://github.com/spdk/spdk/blob/master/doc/jsonrpc.md
+          # - https://spdk.io/doc/nvmf_multipath_howto.html
+          . .env
+          container_ip() {
+            docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1"
+          }
+
+          echo -n "ℹ️  Starting bdevperf container"
+          make up  SVC=bdevperf OPTS="--detach"
+          sleep 10
+          echo "ℹ️  bdevperf start up logs"
+          make logs SVC=bdevperf
+          eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_SOCKET | tr -d '\n\r' )
+          ip=$(container_ip $DISC1)
+          rpc="/usr/libexec/spdk/scripts/rpc.py"
+          echo "ℹ️  bdevperf bdev_nvme_set_options"
+          make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_set_options -r -1"
+          echo "ℹ️  bdevperf start discovery ip: $ip port: $NVMEOF_DISC_PORT"
+          # -l -1 -o 10
+          make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_start_discovery -b Nvme0 -t tcp -a $ip -s $NVMEOF_DISC_PORT -f ipv4 -w"
+          echo "ℹ️  bdevperf bdev_nvme_get_discovery_info"
+          make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_get_discovery_info"
+          echo "ℹ️  bdevperf perform_tests"
+          eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_TEST_DURATION | tr -d '\n\r' )
+          timeout=$(expr $BDEVPERF_TEST_DURATION \* 2)
+          bdevperf="/usr/libexec/spdk/scripts/bdevperf.py"
+          make exec SVC=bdevperf OPTS=-T CMD="$bdevperf -v -t $timeout -s $BDEVPERF_SOCKET perform_tests"
+
+      - name: Check coredump existence
+        if: success() || failure()
+        id: check_coredumps
+        uses: andstor/file-existence-action@20b4d2e596410855db8f9ca21e96fbe18e12930b # v2, pinned to SHA for security reasons
+        with:
+            files: "/var/lib/systemd/coredump/*"
+
+      - name: Upload demo core dumps
+        if: steps.check_coredumps.outputs.files_exists == 'true'
+        uses: actions/upload-artifact@v1
+        with:
+          name: ceph_nvmeof_demo_cores-${{ github.run_number }}
+          path: /var/lib/systemd/coredump/*
+
+      - name: Display logs
+        if: success() || failure()
+        run: make logs OPTS=''
+
+      - name: Tear down
+        if: success() || failure()
+        run: |
+          make down
+          make clean
+
+  push-to-registry:
+    if: github.event_name == 'release' && startsWith(github.ref, 'refs/tags/v')
+    needs: [pytest, demo, discovery]
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Download container images
+        uses: actions/download-artifact@v3
+        with:
+          name: ceph_nvmeof_container_images-${{ github.run_number }}
 
       - name: Load container images
         run: |
@@ -302,4 +457,3 @@ jobs:
       - name: Publish nvmeof containers when release/tag is created
         run: |
           make push
-
diff --git a/Makefile b/Makefile
index 1715f3c4..db01c41a 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ setup: ## Configure huge-pages (requires sudo/root password)
 	@echo Actual Hugepages allocation: $$(cat $(HUGEPAGES_DIR))
 	@[ $$(cat $(HUGEPAGES_DIR)) -eq $(HUGEPAGES) ]
 
-build pull logs: SVC ?= spdk bdevperf nvmeof nvmeof-devel nvmeof-cli ceph
+build pull logs: SVC ?= spdk bdevperf nvmeof nvmeof-devel nvmeof-cli discovery ceph
 
 build: export NVMEOF_GIT_BRANCH != git name-rev --name-only HEAD
 build: export NVMEOF_GIT_COMMIT != git rev-parse HEAD
@@ -34,7 +34,7 @@ build: export BUILD_DATE != date -u +"%Y-%m-%dT%H:%M:%SZ"
 up: ## Launch services
 up: SVC ?= ceph nvmeof ## Services
 up: OPTS ?= --abort-on-container-exit --exit-code-from $(SVC) --remove-orphans
-up: override OPTS += --scale nvmeof=$(SCALE)
+#up: override OPTS += --scale nvmeof=$(SCALE)
 
 clean: $(CLEAN) setup  ## Clean-up environment
 clean: override HUGEPAGES = 0
diff --git a/ceph-nvmeof.conf b/ceph-nvmeof.conf
index c895d6fc..f05cd065 100644
--- a/ceph-nvmeof.conf
+++ b/ceph-nvmeof.conf
@@ -20,7 +20,6 @@ state_update_interval_sec = 5
 enable_discovery_controller = false
 
 [discovery]
-
 addr = 0.0.0.0
 port = 8009
 debug = 20
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 7c518080..2a6828c2 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -99,16 +99,15 @@ services:
         NVMEOF_GIT_COMMIT:
       labels:
         io.ceph.nvmeof:
-    hostname: nvmeof
     volumes:
       # sudo bash -c 'echo 2048 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages'
       # https://spdk.io/doc/containers.html
       # TODO: Pending of https://github.com/spdk/spdk/issues/2973
       - /dev/hugepages:/dev/hugepages
       - /dev/vfio/vfio:/dev/vfio/vfio
-      - ceph-conf:/etc/ceph:ro
       - $NVMEOF_CONFIG:/src/ceph-nvmeof.conf
       - /tmp/coredump:/tmp/coredump # core dump
+      - ceph-conf:/etc/ceph:ro
     cap_add:
       - SYS_ADMIN # huge-pages
       - CAP_SYS_NICE # RTE
@@ -127,9 +126,23 @@ services:
     extends:
       service: nvmeof-base
     image: $QUAY_NVMEOF:$NVMEOF_VERSION
+    ports:
+      - "$NVMEOF_IO_PORT" # I/O controllers
+      - "$NVMEOF_GW_PORT" # Gateway
+    depends_on:
+      ceph:
+        condition: service_healthy
+  discovery:
+    extends:
+      service: nvmeof-base
+    image: $QUAY_NVMEOF:$NVMEOF_VERSION
+    ports:
+      - "$NVMEOF_DISC_PORT" # Discovery
     depends_on:
       ceph:
         condition: service_healthy
+    entrypoint: >-
+      python3 -m control.discovery
   # Used to update lockfile (make update-lockfile)
   nvmeof-builder-base:
     extends:
diff --git a/mk/demo.mk b/mk/demo.mk
index cbeaf12a..4f2f9c80 100644
--- a/mk/demo.mk
+++ b/mk/demo.mk
@@ -6,6 +6,8 @@ rbd: SVC = ceph
 rbd: CMD = bash -c "rbd -p $(RBD_POOL) info $(RBD_IMAGE_NAME) || rbd -p $(RBD_POOL) create $(RBD_IMAGE_NAME) --size $(RBD_IMAGE_SIZE)"
 
 # demo
+# the fist gateway in docker enviroment, hostname defaults to container id
+demo: export NVMEOF_HOSTNAME != docker ps -q -f name=ceph-nvmeof_nvmeof_1
 demo: rbd ## Expose RBD_IMAGE_NAME as NVMe-oF target
 	$(NVMEOF_CLI) create_bdev --pool $(RBD_POOL) --image $(RBD_IMAGE_NAME) --bdev $(BDEV_NAME)
 	$(NVMEOF_CLI) create_subsystem --subnqn $(NQN)

From ca21b78ff93727a506c2da9d9eac477d44d7f2ef Mon Sep 17 00:00:00 2001
From: Alexander Indenbaum <aindenba@redhat.com>
Date: Wed, 30 Aug 2023 17:23:19 +0300
Subject: [PATCH 04/10] discovery: refactor configuration property

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
---
 control/discovery.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/control/discovery.py b/control/discovery.py
index ff875720..b8506b85 100644
--- a/control/discovery.py
+++ b/control/discovery.py
@@ -127,8 +127,9 @@ class Connection:
     nvmeof_connect_data_hostnqn: str = field(default_factory=str)
     sq_head_ptr: int = 0
     unsent_log_page_len: int = 0
-    property_data: str = field(default_factory=str)
-    property_set: bool = False
+    # NVM ExpressTM Revision 1.4, page 47
+    # see Figure 78: Offset 14h: CC – Controller Configuration
+    property_configuration: tuple = tuple((c_ubyte *8)())
     shutdown_now: bool = False
     controller_id: uuid = None
     gen_cnt: int = 0
@@ -458,17 +459,13 @@ def reply_fc_cmd_prop_get(self, conn, data, cmd_id):
             # b'\x00\x00\x46\x00\x00\x00\x00\x00'
             # 0x46: IO Submission Queue Entry Size: 0x6 (64 bytes)
             # IO Completion Queue Entry Size: 0x4 (16 bytes)
-            property_data = (c_ubyte * 8)(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
-            if len(self_conn.property_data) == 8:
-                property_data = (c_ubyte * 8)(*self_conn.property_data)
-            property_get.property_data = property_data
+            property_get.property_data = self_conn.property_configuration
         elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.STATUS:
-            property_data = (c_ubyte * 8)(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
-            if len(self_conn.property_data) == 8:
-                property_data = (c_ubyte * 8)(*self_conn.property_data)
-            shutdown_notification = ((property_data[1]) >> 6) & 0x3
+            shutdown_notification = (self_conn.property_configuration[1] >> 6) & 0x3
             if shutdown_notification == 0:
-                if self_conn.property_set is True:
+                # check CC.EN bit
+                enabled = self_conn.property_configuration[0] & 0x1
+                if enabled != 0:
                     # controller status: ready
                     property_get.property_data = (c_ubyte * 8)(0x01, 0x00, \
                         0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
@@ -510,8 +507,7 @@ def reply_fc_cmd_prop_set(self, conn, data, cmd_id):
         if NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.CAPABILITIES:
             self.logger.error("property setting of capabilities is not supported.")
         elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.CONFIGURATION:
-            self_conn.property_data = struct.unpack_from('<8B', data, 56)
-            self_conn.property_set = True
+            self_conn.property_configuration = struct.unpack_from('<8B', data, 56)
         elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.STATUS:
             self.logger.error("property setting of status is not supported.")
         elif NVME_CTL(nvmeof_prop_get_set_offset) == NVME_CTL.VERSION:
@@ -811,7 +807,6 @@ def reply_get_log_page(self, conn, data, cmd_id):
             self_conn.unsent_log_page_len -= nvme_data_len
             if self_conn.unsent_log_page_len == 0:
                 self_conn.log_page = b''
-                self_conn.property_set = False
                 self_conn.allow_listeners = []
         else:
             self.logger.error("request log page lenghth error. It need to be 16 or n*1024")

From 7bfa6886f5d19537c05764f305f54c1601c99471 Mon Sep 17 00:00:00 2001
From: Alexander Indenbaum <aindenba@redhat.com>
Date: Wed, 30 Aug 2023 17:27:38 +0300
Subject: [PATCH 05/10] get log page: spdk/bdevperf compatability

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
---
 control/discovery.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/control/discovery.py b/control/discovery.py
index b8506b85..6364f968 100644
--- a/control/discovery.py
+++ b/control/discovery.py
@@ -686,9 +686,12 @@ def reply_get_log_page(self, conn, data, cmd_id):
         nvme_nsid = struct.unpack_from('<I', data, 12)[0]
         nvme_rsvd1 = struct.unpack_from('<Q', data, 16)[0]
         nvme_mptr = struct.unpack_from('<Q', data, 24)[0]
+        # https://nvmexpress.org/wp-content/uploads/NVM-Express-1_4-2019.06.10-Ratified.pdf
+        # The SGL Data Block descriptor, defined in Figure 114, describes a data block.
         nvme_sgl = struct.unpack_from('<16B', data, 32)
         nvme_sgl_desc_type = nvme_sgl[15] & 0xF0
         nvme_sgl_desc_sub_type = nvme_sgl[15] & 0x0F
+        nvme_sgl_len = nvme_sgl[8] + (nvme_sgl[9] << 8) + (nvme_sgl[10] << 16) + (nvme_sgl[11] << 24)
         nvme_get_logpage_dword10 = struct.unpack_from('<I', data, 48)[0]
         # nvme_get_logpage_numd indicate the reply bytes, rule: (values+1)*4
         nvme_get_logpage_numd = struct.unpack_from('<I', data, 50)[0]
@@ -708,6 +711,10 @@ def reply_get_log_page(self, conn, data, cmd_id):
             self.logger.error("request type error, not discovery request.")
             return -1
 
+        if nvme_data_len != nvme_sgl_len:
+            self.logger.error(f"request data len error, {nvme_data_len=} != {nvme_sgl_len=}.")
+            return -1
+
         # Filter listeners based on host access permissions
         allow_listeners = self_conn.allow_listeners
         if len(allow_listeners) == 0:
@@ -786,13 +793,13 @@ def reply_get_log_page(self, conn, data, cmd_id):
         nvme_tcp_data_pdu.data_length = nvme_data_len
 
         # reply based on the received get log page request packet(length)
-        if nvme_data_len == 16:
+        if nvme_data_len < 1024:
             # nvme cli version: 1.x
             nvme_get_log_page_reply = NVMeGetLogPage()
             nvme_get_log_page_reply.genctr = self_conn.gen_cnt
             nvme_get_log_page_reply.numrec = len(listeners)
 
-            reply = pdu_reply + nvme_tcp_data_pdu + bytes(nvme_get_log_page_reply)[:16]
+            reply = pdu_reply + nvme_tcp_data_pdu + bytes(nvme_get_log_page_reply)[:nvme_data_len]
         elif nvme_data_len == 1024 and nvme_logpage_offset == 0:
             # nvme cli version: 2.x
             nvme_get_log_page_reply = NVMeGetLogPage()
@@ -809,7 +816,7 @@ def reply_get_log_page(self, conn, data, cmd_id):
                 self_conn.log_page = b''
                 self_conn.allow_listeners = []
         else:
-            self.logger.error("request log page lenghth error. It need to be 16 or n*1024")
+            self.logger.error(f"request log page: invalid length error {nvme_data_len=}")
             return -1
         try:
             conn.sendall(reply)

From e62d3bc992d1e3c08748e3130f9bace0f7fe560a Mon Sep 17 00:00:00 2001
From: Alexander Indenbaum <aindenba@redhat.com>
Date: Wed, 30 Aug 2023 18:07:03 +0300
Subject: [PATCH 06/10] discover: get log page unaligned memory access

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
---
 control/discovery.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/control/discovery.py b/control/discovery.py
index 6364f968..7d8cd6a2 100644
--- a/control/discovery.py
+++ b/control/discovery.py
@@ -694,7 +694,9 @@ def reply_get_log_page(self, conn, data, cmd_id):
         nvme_sgl_len = nvme_sgl[8] + (nvme_sgl[9] << 8) + (nvme_sgl[10] << 16) + (nvme_sgl[11] << 24)
         nvme_get_logpage_dword10 = struct.unpack_from('<I', data, 48)[0]
         # nvme_get_logpage_numd indicate the reply bytes, rule: (values+1)*4
-        nvme_get_logpage_numd = struct.unpack_from('<I', data, 50)[0]
+        nvme_get_logpage_numdl = struct.unpack_from('<H', data, 50)[0]
+        nvme_get_logpage_numdh = struct.unpack_from('<H', data, 52)[0]
+        nvme_get_logpage_numd = (nvme_get_logpage_numdh << 16) + nvme_get_logpage_numdl
         nvme_data_len = (nvme_get_logpage_numd + 1) * 4
         nvme_get_logpage_dword11 = struct.unpack_from('<I', data, 52)[0]
         # Logpage offset overlaps with dword13

From e8cc4930c1ca8ca200b98c6d00ce306bc5a6bb23 Mon Sep 17 00:00:00 2001
From: Alexander Indenbaum <aindenba@redhat.com>
Date: Tue, 5 Sep 2023 18:42:54 +0300
Subject: [PATCH 07/10] README.md: discovery service container in
 docker-compose environment

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index f8b8441c..549e2563 100644
--- a/README.md
+++ b/README.md
@@ -247,6 +247,11 @@ The discovery service can provide all the targets that the current user can acce
     $ python3 -m control.discovery
     ```
 
+2. To start discovery service container in docker-compose environment
+   ```bash
+   $ docker-compose up --detach discovery
+   ```
+
 ## Advanced
 
 ### Configuration

From ebdaa2f92f2d00ad0fcec4369deb21de71c34d12 Mon Sep 17 00:00:00 2001
From: Yin Congmin <congmin.yin@intel.com>
Date: Wed, 6 Sep 2023 16:27:14 +0800
Subject: [PATCH 08/10] README.md:  discover target from discovery service

Signed-off-by: Yin Congmin <congmin.yin@intel.com>
---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 549e2563..4ec10b49 100644
--- a/README.md
+++ b/README.md
@@ -252,6 +252,11 @@ The discovery service can provide all the targets that the current user can acce
    $ docker-compose up --detach discovery
    ```
 
+3. Discover targets from discovery service. The default port is 8009.
+   ```bash
+   $ nvme discover -t tcp -a 192.168.13.3 -s 8009
+   ```
+
 ## Advanced
 
 ### Configuration

From ede30654fa05f2edcbd32729adc1d9988d1a0fee Mon Sep 17 00:00:00 2001
From: Yin Congmin <congmin.yin@intel.com>
Date: Wed, 6 Sep 2023 16:42:34 +0800
Subject: [PATCH 09/10] discovery: update class Connection data type

Signed-off-by: Yin Congmin <congmin.yin@intel.com>
---
 control/discovery.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/control/discovery.py b/control/discovery.py
index 7d8cd6a2..13606376 100644
--- a/control/discovery.py
+++ b/control/discovery.py
@@ -121,10 +121,10 @@ class Connection:
     allow_listeners: list = field(default_factory=list)
     log_page: bytearray = field(default_factory=bytearray)
     recv_buffer: bytearray = field(default_factory=bytearray)
-    nvmeof_connect_data_hostid: str = field(default_factory=str)
+    nvmeof_connect_data_hostid: tuple = tuple((c_ubyte *16)())
     nvmeof_connect_data_cntlid: int = 0
-    nvmeof_connect_data_subnqn: str = field(default_factory=str)
-    nvmeof_connect_data_hostnqn: str = field(default_factory=str)
+    nvmeof_connect_data_subnqn: tuple = tuple((c_ubyte *256)())
+    nvmeof_connect_data_hostnqn: tuple = tuple((c_ubyte *256)())
     sq_head_ptr: int = 0
     unsent_log_page_len: int = 0
     # NVM ExpressTM Revision 1.4, page 47

From d65c6b2e865cc2732a2cea2e594bed4286461718 Mon Sep 17 00:00:00 2001
From: Yin Congmin <congmin.yin@intel.com>
Date: Wed, 6 Sep 2023 16:58:35 +0800
Subject: [PATCH 10/10] discovery: merge code block in reply_get_log_page()

Signed-off-by: Yin Congmin <congmin.yin@intel.com>
---
 control/discovery.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/control/discovery.py b/control/discovery.py
index 13606376..d91ee23d 100644
--- a/control/discovery.py
+++ b/control/discovery.py
@@ -795,20 +795,12 @@ def reply_get_log_page(self, conn, data, cmd_id):
         nvme_tcp_data_pdu.data_length = nvme_data_len
 
         # reply based on the received get log page request packet(length)
-        if nvme_data_len < 1024:
-            # nvme cli version: 1.x
+        if nvme_data_len <= 1024 and nvme_logpage_offset == 0:
             nvme_get_log_page_reply = NVMeGetLogPage()
             nvme_get_log_page_reply.genctr = self_conn.gen_cnt
             nvme_get_log_page_reply.numrec = len(listeners)
 
             reply = pdu_reply + nvme_tcp_data_pdu + bytes(nvme_get_log_page_reply)[:nvme_data_len]
-        elif nvme_data_len == 1024 and nvme_logpage_offset == 0:
-            # nvme cli version: 2.x
-            nvme_get_log_page_reply = NVMeGetLogPage()
-            nvme_get_log_page_reply.genctr = self_conn.gen_cnt
-            nvme_get_log_page_reply.numrec = len(listeners)
-
-            reply = pdu_reply + nvme_tcp_data_pdu+ nvme_get_log_page_reply
         elif nvme_data_len % 1024 == 0:
             # reply log pages
             reply = pdu_reply + nvme_tcp_data_pdu + \