Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ newtype_derive = "0.1.6"
newtype-uuid = { version = "1.0.1", features = [ "v4" ] }
owo-colors = "4"
oxide-tokio-rt = "0.1.2"
paste = "1.0.15"
pin-project-lite = "0.2.13"
proc-macro2 = "1.0"
proc-macro-error = "1"
Expand Down
1 change: 1 addition & 0 deletions lib/propolis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ propolis_types.workspace = true
usdt = { workspace = true, features = ["asm"] }
tokio = { workspace = true, features = ["full"] }
futures.workspace = true
paste.workspace = true
pin-project-lite.workspace = true
anyhow.workspace = true
rgb_frame.workspace = true
Expand Down
42 changes: 24 additions & 18 deletions lib/propolis/src/block/attachment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,17 @@ use std::future::Future;
use std::marker::PhantomPinned;
use std::num::NonZeroUsize;
use std::pin::Pin;
use std::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Condvar, Mutex, MutexGuard, Weak};
use std::task::{Context, Poll};

use super::minder::{NoneInFlight, QueueMinder};
use super::{
devq_id, probes, DeviceId, DeviceInfo, DeviceQueue, DeviceRequest,
MetricConsumer, QueueId, WorkerId,
devq_id, probes, BackendId, DeviceId, DeviceInfo, DeviceQueue,
DeviceRequest, MetricConsumer, QueueId, WorkerId,
};
use crate::accessors::MemAccessor;
use crate::block;

use futures::stream::FuturesUnordered;
use futures::Stream;
Expand All @@ -39,12 +40,6 @@ use thiserror::Error;
use tokio::sync::futures::Notified;
use tokio::sync::Notify;

/// Static for generating unique block [DeviceId]s within a process
///
/// Numbering across block devices means that a block `DeviceId` and the queue
/// ID in a block attachment are unique across a VM.
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);

pub const MAX_WORKERS: NonZeroUsize = NonZeroUsize::new(64).unwrap();

pub type ReqCountHint = Option<NonZeroUsize>;
Expand Down Expand Up @@ -381,6 +376,7 @@ impl AttachPair {
return Err(AttachError::BackendAttached);
}

probes::block_attach!(|| (dev.device_id().0, be.backend_id().0));
// TODO: name the accessor child?
let be_acc_mem = dev.0.acc_mem.child(None);
be.0.workers.attach(&be_acc_mem, &dev.0.queues);
Expand Down Expand Up @@ -431,6 +427,7 @@ impl AttachPair {
return;
}
}
probes::block_detach!(|| (dev.device_id.0, be.backend_id.0));
*dev_state = None;
*be_state = None;

Expand Down Expand Up @@ -458,6 +455,7 @@ struct DeviceAttachInner {
dev_state: Mutex<DeviceState>,
queues: Arc<QueueCollection>,
acc_mem: MemAccessor,
device_id: block::DeviceId,
}

/// Main "attachment point" for a block device.
Expand All @@ -467,13 +465,14 @@ impl DeviceAttachment {
/// queues which the device will ever expose is set via `max_queues`. DMA
/// done by attached backend workers will be through the provided `acc_mem`.
pub fn new(max_queues: NonZeroUsize, acc_mem: MemAccessor) -> Self {
let devid = NEXT_DEVICE_ID.fetch_add(1, Ordering::Relaxed);
let queues = QueueCollection::new(max_queues, devid);
let device_id = DeviceId::new();
let queues = QueueCollection::new(max_queues, device_id);
Self(Arc::new(DeviceAttachInner {
att_state: Mutex::new(None),
dev_state: Mutex::new(DeviceState::default()),
queues,
acc_mem,
device_id,
}))
}

Expand Down Expand Up @@ -560,8 +559,9 @@ impl DeviceAttachment {
}

pub fn device_id(&self) -> DeviceId {
self.0.queues.devid
self.0.device_id
}

/// Get the maximum queues configured for this device.
pub fn max_queues(&self) -> NonZeroUsize {
NonZeroUsize::new(self.0.queues.queues.len())
Expand Down Expand Up @@ -678,9 +678,9 @@ impl WorkerSlot {
};

state.sleeping_on = Some(devid);
probes::block_sleep!(|| { (devid, self.id as u64) });
probes::block_sleep!(|| { (devid.0, self.id as u64) });
state = self.cv.wait(state).unwrap();
probes::block_wake!(|| { (devid, self.id as u64) });
probes::block_wake!(|| { (devid.0, self.id as u64) });
state.sleeping_on = None;
}
}
Expand Down Expand Up @@ -720,13 +720,13 @@ impl WorkerSlot {
devid: DeviceId,
) {
state.sleeping_on = Some(devid);
probes::block_sleep!(|| { (devid, self.id as u64) });
probes::block_sleep!(|| { (devid.0, self.id as u64) });
}

fn async_stop_sleep(&self) {
let mut state = self.state.lock().unwrap();
if let Some(devid) = state.sleeping_on.take() {
probes::block_wake!(|| { (devid, self.id as u64) });
probes::block_wake!(|| { (devid.0, self.id as u64) });
}
}

Expand Down Expand Up @@ -967,13 +967,13 @@ impl WorkerCollection {
}
fn assignments_refresh(&self, mut state: MutexGuard<WorkerColState>) {
let assign = state.generate_assignments();
let devid = state.device_id.unwrap_or(u32::MAX);
let devid = state.device_id.unwrap_or(block::DeviceId::INVALID);
drop(state);

super::probes::block_strategy!(|| {
let assign_name: &'static str = assign.strategy.get().into();
let generation = assign.strategy.generation() as u64;
(devid, assign_name, generation)
(devid.0, assign_name, generation)
});
for slot in self.workers.iter() {
slot.update_assignment(&assign);
Expand Down Expand Up @@ -1155,6 +1155,7 @@ struct BackendAttachInner {
att_state: Mutex<Option<(AttachPair, MemAccessor)>>,
workers: Arc<WorkerCollection>,
info: DeviceInfo,
backend_id: BackendId,
}

/// Main "attachment point" for a block backend.
Expand All @@ -1165,6 +1166,7 @@ impl BackendAttachment {
att_state: Mutex::new(None),
workers: WorkerCollection::new(max_workers),
info,
backend_id: BackendId::new(),
}))
}
/// Get an (inactive) [context](InactiveWorkerCtx) for a given [WorkerId].
Expand All @@ -1182,6 +1184,10 @@ impl BackendAttachment {
self.0.info
}

pub fn backend_id(&self) -> BackendId {
self.0.backend_id
}

/// Permit workers to pull requests from the attached device (if any) for
/// processing.
pub fn start(&self) {
Expand Down
3 changes: 2 additions & 1 deletion lib/propolis/src/block/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,14 @@ impl FileBackend {
}))
}
fn spawn_workers(&self) -> std::io::Result<()> {
let backend_id = self.block_attach.backend_id().0;
let spawn_results = (0..self.worker_count.get())
.map(|n| {
let shared_state = self.state.clone();
let wctx = self.block_attach.worker(n as WorkerId);

std::thread::Builder::new()
.name(format!("file worker {n}"))
.name(format!("file backend {backend_id}/worker {n}"))
.spawn(move || {
let wctx = wctx
.activate_sync()
Expand Down
47 changes: 47 additions & 0 deletions lib/propolis/src/block/id.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Runtime identifiers for block devices and their backends.
//!
//! Devices that support block backends and the block backends themselves are
//! independent identifiers, and only become related when an item of each type
//! is connected via [`block::attach`].
//!
//! Devices in particular may have multiple identifiers, some from this module
//! and some from others. As one example, [`propolis::hw::nvme::NvmeCtrl`] has a
//! `device_id` distinguishing *instances of the NVMe controller* across a VM,
//! while the `PciNvme` which has an NVMe controller also has `block_attach`
//! with a `device_id` distinguishing *instances of block devices* across a VM.
//!
//! ## Limitations
//!
//! A consumer of `propolis` is free to construct devices supporting block
//! backends in any order, and may happen to construct block backends in any
//! different arbitrary order. Attaching the two kinds of item together is also
//! up to the consumer of `propolis`, and there is no requirement that a
//! particular block backend must be connected to a particular device.
//!
//! Consequently, these identifiers are not stable for use in migration of a VM,
//! and must not be used in a way visible to a VM. They are unsuitable for
//! emulated device serial numbers, model numbers, etc. The destination
//! `propolis` may construct the same set of devices in a different order,
//! resulting in different run-time identifiers for a device at the same
//! location.
Comment on lines +17 to +30
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we provide any type of way for a consumer (such as a D script) to establish a mapping of these IDs to externally knowable objects? i see that the DTrace block_attach probe tells you the device ID and the backend ID. would we want to have a way to associate that with an emulated device that can be associated with a guest facing thing? or is that out of scope?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

block_attach is the only way so far, but I figure this is really part of a #482 -y future. that could include more general stats about an entity plus these kinds of IDs. the dirty secret is that as much as I'm warning against it here, we're really just testing on all-NVMe VMs and knowing that Propolis maps NVMe 0 to device attachment 0 to file backend 0, and that we create devices in the order listed in the spec.. :)


use crate::util::id::define_id;

define_id! {
/// Numbering across block devices means that a block `DeviceId` and the
/// queue ID in a block attachment are unique across a VM.
#[derive(Copy, Clone)]
pub struct DeviceId(pub(crate) u32);
}

define_id! {
/// Block backends are numbered distinctly across a VM, but may not
/// be created in the same order as devices. The `block_attach` probe fires
/// when a `DeviceId` and `BackendId` become associated.
#[derive(Copy, Clone)]
pub struct BackendId(pub(crate) u32);
}
9 changes: 7 additions & 2 deletions lib/propolis/src/block/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ use std::time::Duration;
use crate::common::*;
use crate::vmm::{MemCtx, SubMapping};

mod id;
pub use id::{BackendId, DeviceId};

mod file;
pub use file::FileBackend;

Expand Down Expand Up @@ -41,6 +44,9 @@ pub const DEFAULT_BLOCK_SIZE: u32 = 512;

#[usdt::provider(provider = "propolis")]
mod probes {
fn block_attach(dev_id: u32, backend_id: u32) {}
fn block_detach(dev_id: u32, backend_id: u32) {}

fn block_begin_read(devq_id: u64, req_id: u64, offset: u64, len: u64) {}
fn block_begin_write(devq_id: u64, req_id: u64, offset: u64, len: u64) {}
fn block_begin_flush(devq_id: u64, req_id: u64) {}
Expand Down Expand Up @@ -181,12 +187,11 @@ impl From<QueueId> for u16 {
}
}

pub type DeviceId = u32;
pub type WorkerId = usize;

/// Combine device and queue IDs into single u64 for probes
pub(crate) fn devq_id(dev: DeviceId, queue: QueueId) -> u64 {
((dev as u64) << 8) | (queue.0 as u64)
((dev.0 as u64) << 8) | (queue.0 as u64)
}

/// Block device operation request
Expand Down
20 changes: 13 additions & 7 deletions lib/propolis/src/hw/nvme/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use std::convert::TryInto;
use std::mem::size_of;
use std::num::NonZeroUsize;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, MutexGuard, Weak};

Expand All @@ -16,6 +15,7 @@ use crate::hw::ids::pci::{PROPOLIS_NVME_DEV_ID, VENDOR_OXIDE};
use crate::hw::ids::OXIDE_OUI;
use crate::hw::pci;
use crate::migrate::*;
use crate::util::id::define_id;
use crate::util::regmap::RegMap;
use crate::vmm::MemAccessed;

Expand All @@ -32,10 +32,16 @@ mod requests;
use bits::*;
use queue::{CompQueue, QueueId, SubQueue};

/// Static for generating unique NVMe device identifiers across a VM.
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);

type DeviceId = u32;
define_id! {
/// Identifier for which NVMe controller in the VM an operation is happening
/// on.
///
/// This is mostly useful for NVMe-related DTrace probes, where otherwise a
/// queue number or command ID may be ambiguous across distinct NVMe
/// controllers in a VM.
#[derive(Copy, Clone)]
pub struct DeviceId(u32);
}

#[usdt::provider(provider = "propolis")]
mod probes {
Expand All @@ -58,7 +64,7 @@ pub(crate) fn devq_id(dev: DeviceId, queue: QueueId) -> u64 {
static_assertions::const_assert!(QueueId::MAX <= u16::MAX);
}

((dev as u64) << 16) | (queue as u64)
((dev.0 as u64) << 16) | (queue as u64)
}

/// The max number of MSI-X interrupts we support
Expand Down Expand Up @@ -885,7 +891,7 @@ impl PciNvme {
let csts = Status(0);

let state = NvmeCtrl {
device_id: NEXT_DEVICE_ID.fetch_add(1, Ordering::Relaxed),
device_id: DeviceId::new(),
ctrl: CtrlState { cap, cc, csts, ..Default::default() },
doorbell_buf: None,
msix_hdl: None,
Expand Down
Loading
Loading