network.py 2.9 KB
Newer Older
1
2
3
4
5
6
7
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Network Utility."""

import socket
import re
8
import os
9
from pathlib import Path
10
from superbench.common.utils import logger
11
12
13
14
15
16
17
18
19
20


def get_free_port():
    """Get a free port in current system.

    Return:
        port (int): a free port in current system.
    """
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
21
        s.bind(('127.0.0.1', 0))
22
23
24
25
26
27
28
29
30
31
32
33
34
35
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        return s.getsockname()[1]
    except OSError:
        return None
    finally:
        s.close()


def get_ib_devices():
    """Get available IB devices with available ports in the system and filter ethernet devices.

    Return:
        ib_devices_port (list): IB devices with available ports in current system.
    """
36
    if os.getenv('IB_DEVICES', None):
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
        ib_devices_env = os.getenv('IB_DEVICES').split(',')
        # Validate that IB_DEVICES contains either all
        # numeric indices or all device names, not mixed
        numeric_flags = [device.strip().isdigit() for device in ib_devices_env]
        all_numeric = all(numeric_flags)
        any_numeric = any(numeric_flags)

        # Check for mixed case (some numeric, some not)
        if any_numeric and not all_numeric:
            logger.log_and_raise(
                exception=ValueError,
                msg='IB_DEVICES contains mixed numeric indices and device names: {}. '
                'All values must be either numeric indices (e.g., "0,2,4,6") '
                'or device names (e.g., "mlx5_ib0,mlx5_ib2").'.format(os.getenv('IB_DEVICES'))
            )

        # If all numeric, fall through to discover actual devices; otherwise use provided names
        if not all_numeric:
            # All are device names, use them directly
            return ib_devices_env
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
    devices = list(p.name for p in Path('/sys/class/infiniband').glob('*'))
    ib_devices_port_dict = {}
    for device in devices:
        ports = list(p.name for p in (Path('/sys/class/infiniband') / device / 'ports').glob('*'))
        ports.sort(key=lambda s: [int(ch) if ch.isdigit() else ch for ch in re.split(r'(\d+)', s)])
        for port in ports:
            with (Path('/sys/class/infiniband') / device / 'ports' / port / 'link_layer').open('r') as f:
                # Filter 'InfiniBand' devices by link_layer
                if f.read().strip() == 'InfiniBand':
                    if device not in ib_devices_port_dict:
                        ib_devices_port_dict[device] = [port]
                    else:
                        ib_devices_port_dict[device].append(port)
    ib_devices = list(ib_devices_port_dict.keys())
    ib_devices.sort(key=lambda s: [int(ch) if ch.isdigit() else ch for ch in re.split(r'(\d+)', s)])
    ib_devices_port = []
    for device in ib_devices:
        ib_devices_port.append(device + ':' + ','.join(ib_devices_port_dict[device]))
    return ib_devices_port