Skip to content

Commit

Permalink
*KVM: x86: Add platform hooks for private memory invalidations
Browse files Browse the repository at this point in the history
Also issue the invalidations with folio-granularity to provide future
path toward handling private/shared transitions at hugepage granularity
for things like restoring 2M directmap and hugetlbfs support.

Also pass a struct page so the invalidated pages can be fixed up
as-needed by the platform prior to them being freed.

Also issue invalidations of all allocated pages during notifier
unregistration so that the pages are not left in an unusable state when
they eventually get freed back to the host upon FD release.

Signed-off-by: Michael Roth <michael.roth@amd.com>
  • Loading branch information
mdroth committed Oct 25, 2022
1 parent 938f2ed commit 127e5ea
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 5 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm-x86-ops.h
Expand Up @@ -132,6 +132,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL_RET0(private_mem_enabled);
KVM_X86_OP_OPTIONAL_RET0(fault_is_private);
KVM_X86_OP_OPTIONAL_RET0(update_mem_attr)
KVM_X86_OP_OPTIONAL(invalidate_private_range)

#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Expand Up @@ -1565,6 +1565,8 @@ struct kvm_x86_ops {
int (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault);
int (*update_mem_attr)(struct kvm_memory_slot *slot, unsigned int attr,
gfn_t start, gfn_t end);
void (*invalidate_private_range)(struct kvm_memory_slot *slot, gfn_t start, gfn_t end,
struct page *page, int order);

bool (*has_wbinvd_exit)(void);

Expand Down
6 changes: 6 additions & 0 deletions arch/x86/kvm/mmu/mmu.c
Expand Up @@ -7056,6 +7056,12 @@ static void update_mem_lpage_info(struct kvm *kvm,
}
}

void kvm_arch_invalidate_private_range(struct kvm_memory_slot *slot, gfn_t start, gfn_t end,
struct page *page, int order)
{
static_call_cond(kvm_x86_invalidate_private_range)(slot, start, end, page, order);
}

void kvm_arch_update_mem_attr(struct kvm *kvm, unsigned int attr,
gfn_t start, gfn_t end)
{
Expand Down
7 changes: 7 additions & 0 deletions include/linux/kvm_host.h
Expand Up @@ -2298,11 +2298,18 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
#ifdef __KVM_HAVE_ARCH_UPDATE_MEM_ATTR
void kvm_arch_update_mem_attr(struct kvm *kvm, unsigned int attr,
gfn_t start, gfn_t end);
void kvm_arch_invalidate_private_range(struct kvm_memory_slot *slot, gfn_t start, gfn_t end,
struct page *page, int order);
#else
static inline void kvm_arch_update_mem_attr(struct kvm *kvm, unsigned int attr,
gfn_t start, gfn_t end)
{
}

static inline void kvm_arch_invalidate_private_range(struct kvm_memory_slot *slot, gfn_t start,
gfn_t end, struct page *page, int order)
{
}
#endif

static inline int kvm_private_mem_get_pfn(struct kvm_memory_slot *slot,
Expand Down
2 changes: 1 addition & 1 deletion include/linux/memfd.h
Expand Up @@ -18,7 +18,7 @@ struct inaccessible_notifier;

struct inaccessible_notifier_ops {
void (*invalidate)(struct inaccessible_notifier *notifier,
pgoff_t start, pgoff_t end);
pgoff_t start, pgoff_t end, struct page *page, int order);
};

struct inaccessible_notifier {
Expand Down
58 changes: 55 additions & 3 deletions mm/memfd_inaccessible.c
Expand Up @@ -14,21 +14,57 @@ struct inaccessible_data {
};

static void inaccessible_notifier_invalidate(struct inaccessible_data *data,
pgoff_t start, pgoff_t end)
pgoff_t start, pgoff_t end, struct page *page,
int order)
{
struct inaccessible_notifier *notifier;

mutex_lock(&data->lock);
list_for_each_entry(notifier, &data->notifiers, list) {
notifier->ops->invalidate(notifier, start, end);
notifier->ops->invalidate(notifier, start, end, page, order);
}
mutex_unlock(&data->lock);
}

static void inaccessible_invalidate_range(struct inaccessible_data *data,
pgoff_t start, pgoff_t end)
{
struct file *memfd = data->memfd;
pgoff_t offset = start;
int ret;

while (offset < end) {
pgoff_t npages;
struct page *page;
int order;

ret = shmem_getpage(file_inode(memfd), offset, &page, SGP_NOALLOC);
if (ret) {
pr_debug_ratelimited("%s: ret: %d\n", __func__, ret);
offset++;
continue;
}

order = thp_order(compound_head(page));
npages = (1 << order) - (start - ALIGN(start, (1 << order)));
npages = min_t(pgoff_t, end - offset, npages);
unlock_page(page);

inaccessible_notifier_invalidate(data, offset, offset + npages, page, order);

put_page(page);
offset += npages;
cond_resched();
}
}

static int inaccessible_release(struct inode *inode, struct file *file)
{
struct inaccessible_data *data = inode->i_mapping->private_data;

pr_debug("%s: releasing memfd, invalidating page offsets 0x0-0x%llx\n",
__func__, inode->i_size >> PAGE_SHIFT);
inaccessible_invalidate_range(data, 0, inode->i_size >> PAGE_SHIFT);
fput(data->memfd);
kfree(data);
return 0;
Expand All @@ -44,10 +80,16 @@ static long inaccessible_fallocate(struct file *file, int mode,
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
return -EINVAL;

inaccessible_invalidate_range(data, offset >> PAGE_SHIFT,
(offset + len) >> PAGE_SHIFT);
}

ret = memfd->f_op->fallocate(memfd, mode, offset, len);
inaccessible_notifier_invalidate(data, offset, offset + len);
if (ret)
pr_warn("%s: fallocate() failed, ret: %d, offset: 0x%llx, mode: %x", __func__,
ret, offset, mode);

return ret;
}

Expand Down Expand Up @@ -179,6 +221,16 @@ void inaccessible_unregister_notifier(struct file *file,
struct inaccessible_notifier *notifier)
{
struct inaccessible_data *data = file->f_mapping->private_data;
struct inode *inode = file_inode(data->memfd);

/* TODO: this will issue notifications to all registered notifiers,
* but it's only the one being unregistered that needs to process
* invalidations for any ranges still allocated at this point in
* time. For now this relies on KVM currently being the only notifier.
*/
pr_debug("%s: unregistering notifier, invalidating page offsets 0x0-0x%llx\n",
__func__, inode->i_size >> PAGE_SHIFT);
inaccessible_invalidate_range(data, 0, inode->i_size >> PAGE_SHIFT);

mutex_lock(&data->lock);
list_del(&notifier->list);
Expand Down
4 changes: 3 additions & 1 deletion virt/kvm/kvm_main.c
Expand Up @@ -1037,7 +1037,8 @@ static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
}

static void kvm_private_notifier_invalidate(struct inaccessible_notifier *notifier,
pgoff_t start, pgoff_t end)
pgoff_t start, pgoff_t end, struct page *page,
int order)
{
struct kvm_memory_slot *slot = container_of(notifier,
struct kvm_memory_slot,
Expand All @@ -1057,6 +1058,7 @@ static void kvm_private_notifier_invalidate(struct inaccessible_notifier *notifi
return;

kvm_zap_gfn_range(slot->kvm, start_gfn, end_gfn);
kvm_arch_invalidate_private_range(slot, start_gfn, end_gfn, page, order);
}

static struct inaccessible_notifier_ops kvm_private_notifier_ops = {
Expand Down

0 comments on commit 127e5ea

Please sign in to comment.