Skip to content

Commit

Permalink
*KVM: x86: Add platform hooks for private memory invalidations
Browse files Browse the repository at this point in the history
Also issue the invalidations with folio-granularity to provide future
path toward handling private/shared transitions at hugepage granularity
for things like restoring 2M directmap and hugetlbfs support.

Also pass a struct page so the invalidated pages can be fixed up
as-needed by the platform prior to them being freed.

Also issue invalidations of all allocated pages during notifier
unregistration so that the pages are not left in an unusable state when
they eventually get freed back to the host upon FD release.

Signed-off-by: Michael Roth <michael.roth@amd.com>
  • Loading branch information
mdroth committed Nov 28, 2022
1 parent 8df300b commit d669c7d
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 13 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm-x86-ops.h
Expand Up @@ -132,6 +132,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL_RET0(private_mem_enabled);
KVM_X86_OP_OPTIONAL_RET0(fault_is_private);
KVM_X86_OP_OPTIONAL_RET0(update_mem_attr)
KVM_X86_OP_OPTIONAL(invalidate_restricted_mem)

#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm_host.h
Expand Up @@ -1572,6 +1572,7 @@ struct kvm_x86_ops {
int (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault);
int (*update_mem_attr)(struct kvm_memory_slot *slot, unsigned int attr,
gfn_t start, gfn_t end);
void (*invalidate_restricted_mem)(struct kvm_memory_slot *slot, gfn_t start, gfn_t end);

bool (*has_wbinvd_exit)(void);

Expand Down
5 changes: 5 additions & 0 deletions arch/x86/kvm/mmu/mmu.c
Expand Up @@ -7094,3 +7094,8 @@ void kvm_arch_update_mem_attr(struct kvm *kvm, struct kvm_memory_slot *slot,
pr_warn_ratelimited("Failed to update GFN range 0x%llx-0x%llx to %s. Ret: %d\n",
start, end, (attr & KVM_MEM_ATTR_PRIVATE) ? "private" : "shared", ret);
}

void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start, gfn_t end)
{
static_call_cond(kvm_x86_invalidate_restricted_mem)(slot, start, end);
}
6 changes: 6 additions & 0 deletions include/linux/kvm_host.h
Expand Up @@ -2305,13 +2305,19 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
#ifdef __KVM_HAVE_ARCH_UPDATE_MEM_ATTR
void kvm_arch_update_mem_attr(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned int attr, gfn_t start, gfn_t end);
void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start, gfn_t end);
#else
static inline void kvm_arch_update_mem_attr(struct kvm *kvm,
struct kvm_memory_slot *slot,
unsigned int attr,
gfn_t start, gfn_t end)
{
}

static inline void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start,
gfn_t end)
{
}
#endif

static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
Expand Down
67 changes: 54 additions & 13 deletions mm/restrictedmem.c
Expand Up @@ -14,45 +14,75 @@ struct restrictedmem_data {
struct list_head notifiers;
};

static void restrictedmem_notifier_invalidate(struct restrictedmem_data *data,
pgoff_t start, pgoff_t end, bool notify_start)
static void restrictedmem_notifier_invalidate_start(struct restrictedmem_data *data,
pgoff_t start, pgoff_t end)
{
struct restrictedmem_notifier *notifier;

mutex_lock(&data->lock);
list_for_each_entry(notifier, &data->notifiers, list) {
if (notify_start)
list_for_each_entry(notifier, &data->notifiers, list)
notifier->ops->invalidate_start(notifier, start, end);
else
notifier->ops->invalidate_end(notifier, start, end);
}
mutex_unlock(&data->lock);
}

static void restrictedmem_notifier_invalidate_end(struct restrictedmem_data *data,
pgoff_t start, pgoff_t end)
{
struct restrictedmem_notifier *notifier;

mutex_lock(&data->lock);
list_for_each_entry(notifier, &data->notifiers, list)
notifier->ops->invalidate_end(notifier, start, end);
mutex_unlock(&data->lock);
}

static int restrictedmem_release(struct inode *inode, struct file *file)
{
struct restrictedmem_data *data = inode->i_mapping->private_data;

pr_debug("%s: releasing memfd, invalidating page offsets 0x0-0x%llx\n",
__func__, inode->i_size >> PAGE_SHIFT);
restrictedmem_notifier_invalidate_start(data, 0, inode->i_size >> PAGE_SHIFT);
restrictedmem_notifier_invalidate_end(data, 0, inode->i_size >> PAGE_SHIFT);

fput(data->memfd);
kfree(data);
return 0;
}

static long restrictedmem_deallocate(struct restrictedmem_data *data, int mode,
loff_t start, loff_t len)
{
struct file *memfd = data->memfd;
pgoff_t pg_start, pg_end;
int ret;

pg_start = start >> PAGE_SHIFT;
pg_end = (start + len) >> PAGE_SHIFT;
pg_end = min_t(pgoff_t, pg_end, memfd->f_inode->i_size >> PAGE_SHIFT);

restrictedmem_notifier_invalidate_start(data, pg_start, pg_end);
ret = memfd->f_op->fallocate(memfd, mode, start, len);
restrictedmem_notifier_invalidate_end(data, pg_start, pg_end);

return ret;
}

static long restrictedmem_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
struct restrictedmem_data *data = file->f_mapping->private_data;
struct file *memfd = data->memfd;
int ret;

if (mode & FALLOC_FL_PUNCH_HOLE) {
if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
return -EINVAL;
}
if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
return -EINVAL;

if (mode & FALLOC_FL_PUNCH_HOLE)
return restrictedmem_deallocate(data, mode, offset, len);

restrictedmem_notifier_invalidate(data, offset, offset + len, true);
ret = memfd->f_op->fallocate(memfd, mode, offset, len);
restrictedmem_notifier_invalidate(data, offset, offset + len, false);

return ret;
}

Expand Down Expand Up @@ -219,6 +249,17 @@ void restrictedmem_unregister_notifier(struct file *file,
struct restrictedmem_notifier *notifier)
{
struct restrictedmem_data *data = file->f_mapping->private_data;
struct inode *inode = file_inode(data->memfd);

/* TODO: this will issue notifications to all registered notifiers,
* but it's only the one being unregistered that needs to process
* invalidations for any ranges still allocated at this point in
* time. For now this relies on KVM currently being the only notifier.
*/
pr_debug("%s: unregistering notifier, invalidating page offsets 0x0-0x%llx\n",
__func__, inode->i_size >> PAGE_SHIFT);
restrictedmem_notifier_invalidate_start(data, 0, inode->i_size >> PAGE_SHIFT);
restrictedmem_notifier_invalidate_end(data, 0, inode->i_size >> PAGE_SHIFT);

mutex_lock(&data->lock);
list_del(&notifier->list);
Expand Down
5 changes: 5 additions & 0 deletions virt/kvm/kvm_main.c
Expand Up @@ -1116,6 +1116,9 @@ static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *no
&gfn_start, &gfn_end))
return;

pr_debug("%s: start: 0x%lx, end: 0x%lx, roffset: 0x%llx, gfn_start: 0x%llx, gfn_end: 0x%llx\n",
__func__, start, end, slot->restricted_offset, gfn_start, gfn_end);

idx = srcu_read_lock(&kvm->srcu);
KVM_MMU_LOCK(kvm);

Expand All @@ -1130,6 +1133,8 @@ static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *no
if (kvm_unmap_gfn_range(kvm, &gfn_range))
kvm_flush_remote_tlbs(kvm);

kvm_arch_invalidate_restricted_mem(slot, gfn_start, gfn_end);

KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}
Expand Down

0 comments on commit d669c7d

Please sign in to comment.