local.rs 4.54 KB
Newer Older
Ryan Olson's avatar
Ryan Olson committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use super::*;

/// The local block factories for the block manager
///
/// This struct will construct the factories in a consistent order and can be
/// used as an intermediate step before creating the block pools.
///
/// This is useful for debugging and for testing.
#[derive(Dissolve)]
pub struct LocalBlockDataFactories {
    block_set: NixlBlockSet,
    disk_factory: Option<LocalBlockDataFactory<DiskStorage>>,
    host_factory: Option<LocalBlockDataFactory<PinnedStorage>>,
    device_factory: Option<LocalBlockDataFactory<DeviceStorage>>,
}

impl LocalBlockDataFactories {
    /// Construct the local block factories
    pub fn new(resources: &mut Resources) -> Result<Self> {
        let mut block_set = NixlBlockSet::new(resources.worker_id);
        let mut next_block_set_idx = 0;
        let layout_builder = resources.layout_builder();

        let device_factory = if let Some(config) = resources.config.device_layout.take() {
            next_block_set_idx += 1;
29
30
31

            let offload_filter = config.offload_filter.clone();

Ryan Olson's avatar
Ryan Olson committed
32
33
34
35
36
37
38
39
40
41
42
            tracing::debug!("Constructing device pool.");
            let layout = create_layout(
                layout_builder.clone(),
                config,
                resources.nixl_agent.as_ref().as_ref(),
            )?;
            block_set.add_block_set(next_block_set_idx, layout.serialize()?);
            Some(LocalBlockDataFactory::new(
                layout,
                next_block_set_idx,
                resources.worker_id,
43
                offload_filter,
Ryan Olson's avatar
Ryan Olson committed
44
45
46
47
48
49
50
            ))
        } else {
            None
        };

        let host_factory = if let Some(config) = resources.config.host_layout.take() {
            next_block_set_idx += 1;
51
52
53

            let offload_filter = config.offload_filter.clone();

Ryan Olson's avatar
Ryan Olson committed
54
55
56
57
58
59
60
61
62
63
64
            tracing::debug!("Constructing host pool.");
            let layout = create_layout(
                layout_builder.clone(),
                config,
                resources.nixl_agent.as_ref().as_ref(),
            )?;
            block_set.add_block_set(next_block_set_idx, layout.serialize()?);
            Some(LocalBlockDataFactory::new(
                layout,
                next_block_set_idx,
                resources.worker_id,
65
                offload_filter,
Ryan Olson's avatar
Ryan Olson committed
66
67
68
69
70
71
            ))
        } else {
            None
        };

        let disk_factory = if let Some(config) = resources.config.disk_layout.take() {
72
73
            let offload_filter = config.offload_filter.clone();

Ryan Olson's avatar
Ryan Olson committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
            if resources.nixl_agent.is_none() {
                tracing::warn!("NIXL is disabled; will not allocate disk blocks.");
                None
            } else {
                next_block_set_idx += 1;
                tracing::debug!("Constructing disk pool.");
                let layout = create_layout(
                    layout_builder.clone(),
                    config,
                    resources.nixl_agent.as_ref().as_ref(),
                )?;
                block_set.add_block_set(next_block_set_idx, layout.serialize()?);
                Some(LocalBlockDataFactory::new(
                    layout,
                    next_block_set_idx,
                    resources.worker_id,
90
                    offload_filter,
Ryan Olson's avatar
Ryan Olson committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
                ))
            }
        } else {
            None
        };

        Ok(Self {
            block_set,
            disk_factory,
            host_factory,
            device_factory,
        })
    }
}

fn create_layout<S: Storage + NixlRegisterableStorage>(
    mut builder: LayoutConfigBuilder,
    config: KvManagerLayoutConfig<S>,
    nixl_agent: Option<&NixlAgent>,
) -> Result<Arc<dyn NixlLayout<StorageType = S>>> {
    let layout = builder.num_blocks(config.num_blocks).build()?;

    if let Some(_logical) = config.logical {
        return Err(anyhow::anyhow!(
            "Logical layouts are not supported by the local builder"
        ));
    }

    if let Some(storage) = config.storage {
        let mut layout = layout.create_layout(config.layout_type, storage)?;
        if let Some(nixl_agent) = nixl_agent {
            layout.nixl_register(nixl_agent, None)?;
        }
        return Ok(layout.into());
    }

    if let Some(allocator) = config.allocator {
        let mut layout = layout.allocate_layout(config.layout_type, allocator)?;
        if let Some(nixl_agent) = nixl_agent {
            layout.nixl_register(nixl_agent, None)?;
        }
        return Ok(layout.into());
    }

    anyhow::bail!("failed to create layout");
}