Skip to content

Commit

Permalink
mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases
Browse files Browse the repository at this point in the history
commit 53ba78d ("mm/gup: introduce
check_and_migrate_movable_folios()") created a new constraint on the
pin_user_pages*() API family: a potentially large internal allocation must
now occur, for FOLL_LONGTERM cases.

A user-visible consequence has now appeared: user space can no longer pin
more than 2GB of memory anymore on x86_64.  That's because, on a 4KB
PAGE_SIZE system, when user space tries to (indirectly, via a device
driver that calls pin_user_pages()) pin 2GB, this requires an allocation
of a folio pointers array of MAX_PAGE_ORDER size, which is the limit for
kmalloc().

In addition to the directly visible effect described above, there is also
the problem of adding an unnecessary allocation.  The **pages array
argument has already been allocated, and there is no need for a redundant
**folios array allocation in this case.

Fix this by avoiding the new allocation entirely.  This is done by
referring to either the original page[i] within **pages, or to the
associated folio.  Thanks to David Hildenbrand for suggesting this
approach and for providing the initial implementation (which I've tested
and adjusted slightly) as well.

[[email protected]: whitespace tweak, per David]
  Link: https://lkml.kernel.org/r/[email protected]
[[email protected]: bypass pofs_get_folio(), per Oscar]
  Link: https://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
Fixes: 53ba78d ("mm/gup: introduce check_and_migrate_movable_folios()")
Signed-off-by: John Hubbard <[email protected]>
Suggested-by: David Hildenbrand <[email protected]>
Acked-by: David Hildenbrand <[email protected]>
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Vivek Kasireddy <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Gerd Hoffmann <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Peter Xu <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dongwon Kim <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Junxiao Chang <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
johnhubbard authored and akpm00 committed Nov 12, 2024
1 parent 247d720 commit 94efde1
Showing 1 changed file with 77 additions and 39 deletions.
116 changes: 77 additions & 39 deletions mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -2273,20 +2273,57 @@ struct page *get_dump_page(unsigned long addr)
#endif /* CONFIG_ELF_CORE */

#ifdef CONFIG_MIGRATION

/*
* An array of either pages or folios ("pofs"). Although it may seem tempting to
* avoid this complication, by simply interpreting a list of folios as a list of
* pages, that approach won't work in the longer term, because eventually the
* layouts of struct page and struct folio will become completely different.
* Furthermore, this pof approach avoids excessive page_folio() calls.
*/
struct pages_or_folios {
union {
struct page **pages;
struct folio **folios;
void **entries;
};
bool has_folios;
long nr_entries;
};

static struct folio *pofs_get_folio(struct pages_or_folios *pofs, long i)
{
if (pofs->has_folios)
return pofs->folios[i];
return page_folio(pofs->pages[i]);
}

static void pofs_clear_entry(struct pages_or_folios *pofs, long i)
{
pofs->entries[i] = NULL;
}

static void pofs_unpin(struct pages_or_folios *pofs)
{
if (pofs->has_folios)
unpin_folios(pofs->folios, pofs->nr_entries);
else
unpin_user_pages(pofs->pages, pofs->nr_entries);
}

/*
* Returns the number of collected folios. Return value is always >= 0.
*/
static unsigned long collect_longterm_unpinnable_folios(
struct list_head *movable_folio_list,
unsigned long nr_folios,
struct folio **folios)
struct list_head *movable_folio_list,
struct pages_or_folios *pofs)
{
unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
bool drain_allow = true;

for (i = 0; i < nr_folios; i++) {
struct folio *folio = folios[i];
for (i = 0; i < pofs->nr_entries; i++) {
struct folio *folio = pofs_get_folio(pofs, i);

if (folio == prev_folio)
continue;
Expand Down Expand Up @@ -2327,24 +2364,23 @@ static unsigned long collect_longterm_unpinnable_folios(
* Returns -EAGAIN if all folios were successfully migrated or -errno for
* failure (or partial success).
*/
static int migrate_longterm_unpinnable_folios(
struct list_head *movable_folio_list,
unsigned long nr_folios,
struct folio **folios)
static int
migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list,
struct pages_or_folios *pofs)
{
int ret;
unsigned long i;

for (i = 0; i < nr_folios; i++) {
struct folio *folio = folios[i];
for (i = 0; i < pofs->nr_entries; i++) {
struct folio *folio = pofs_get_folio(pofs, i);

if (folio_is_device_coherent(folio)) {
/*
* Migration will fail if the folio is pinned, so
* convert the pin on the source folio to a normal
* reference.
*/
folios[i] = NULL;
pofs_clear_entry(pofs, i);
folio_get(folio);
gup_put_folio(folio, 1, FOLL_PIN);

Expand All @@ -2363,8 +2399,8 @@ static int migrate_longterm_unpinnable_folios(
* calling folio_isolate_lru() which takes a reference so the
* folio won't be freed if it's migrating.
*/
unpin_folio(folios[i]);
folios[i] = NULL;
unpin_folio(folio);
pofs_clear_entry(pofs, i);
}

if (!list_empty(movable_folio_list)) {
Expand All @@ -2387,12 +2423,26 @@ static int migrate_longterm_unpinnable_folios(
return -EAGAIN;

err:
unpin_folios(folios, nr_folios);
pofs_unpin(pofs);
putback_movable_pages(movable_folio_list);

return ret;
}

static long
check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
{
LIST_HEAD(movable_folio_list);
unsigned long collected;

collected = collect_longterm_unpinnable_folios(&movable_folio_list,
pofs);
if (!collected)
return 0;

return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
}

/*
* Check whether all folios are *allowed* to be pinned indefinitely (long term).
* Rather confusingly, all folios in the range are required to be pinned via
Expand All @@ -2417,16 +2467,13 @@ static int migrate_longterm_unpinnable_folios(
static long check_and_migrate_movable_folios(unsigned long nr_folios,
struct folio **folios)
{
unsigned long collected;
LIST_HEAD(movable_folio_list);
struct pages_or_folios pofs = {
.folios = folios,
.has_folios = true,
.nr_entries = nr_folios,
};

collected = collect_longterm_unpinnable_folios(&movable_folio_list,
nr_folios, folios);
if (!collected)
return 0;

return migrate_longterm_unpinnable_folios(&movable_folio_list,
nr_folios, folios);
return check_and_migrate_movable_pages_or_folios(&pofs);
}

/*
Expand All @@ -2436,22 +2483,13 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages)
{
struct folio **folios;
long i, ret;
struct pages_or_folios pofs = {
.pages = pages,
.has_folios = false,
.nr_entries = nr_pages,
};

folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
if (!folios) {
unpin_user_pages(pages, nr_pages);
return -ENOMEM;
}

for (i = 0; i < nr_pages; i++)
folios[i] = page_folio(pages[i]);

ret = check_and_migrate_movable_folios(nr_pages, folios);

kfree(folios);
return ret;
return check_and_migrate_movable_pages_or_folios(&pofs);
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,
Expand Down

0 comments on commit 94efde1

Please sign in to comment.