Discussion:
[Xen-devel] [PATCH v2 1/2] x86/dom0: rename paging function
Roger Pau Monne
2018-12-05 14:54:59 UTC
Permalink
To note it's calculating the approximate amount of memory required by
shadow paging.

No functional change.

Signed-off-by: Roger Pau Monné <***@citrix.com>
---
Cc: Jan Beulich <***@suse.com>
Cc: Andrew Cooper <***@citrix.com>
Cc: Wei Liu <***@citrix.com>
Cc: ***@bertin.fr
---
xen/arch/x86/dom0_build.c | 4 ++--
xen/arch/x86/hvm/dom0_build.c | 2 +-
xen/include/asm-x86/dom0_build.h | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index 5e2ad4bd56..ba9aa85611 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -256,7 +256,7 @@ boolean_param("ro-hpet", ro_hpet);

unsigned int __initdata dom0_memflags = MEMF_no_dma|MEMF_exact_node;

-unsigned long __init dom0_paging_pages(const struct domain *d,
+unsigned long __init dom0_shadow_pages(const struct domain *d,
unsigned long nr_pages)
{
/* Copied from: libxl_get_required_shadow_memory() */
@@ -325,7 +325,7 @@ unsigned long __init dom0_compute_nr_pages(
break;

/* Reserve memory for shadow or HAP. */
- avail -= dom0_paging_pages(d, nr_pages);
+ avail -= dom0_shadow_pages(d, nr_pages);
}

if ( is_pv_domain(d) &&
diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index 12c20a4b66..2af2bd8c3d 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -366,7 +366,7 @@ static int __init pvh_setup_p2m(struct domain *d)
pvh_setup_e820(d, nr_pages);
do {
preempted = false;
- paging_set_allocation(d, dom0_paging_pages(d, nr_pages),
+ paging_set_allocation(d, dom0_shadow_pages(d, nr_pages),
&preempted);
process_pending_softirqs();
} while ( preempted );
diff --git a/xen/include/asm-x86/dom0_build.h b/xen/include/asm-x86/dom0_build.h
index 33a5483739..22f960b8b0 100644
--- a/xen/include/asm-x86/dom0_build.h
+++ b/xen/include/asm-x86/dom0_build.h
@@ -25,7 +25,7 @@ int dom0_construct_pvh(struct domain *d, const module_t *image,
module_t *initrd,
char *cmdline);

-unsigned long dom0_paging_pages(const struct domain *d,
+unsigned long dom0_shadow_pages(const struct domain *d,
unsigned long nr_pages);

void dom0_update_physmap(struct domain *d, unsigned long pfn,
--
2.19.1
Roger Pau Monne
2018-12-05 14:55:00 UTC
Permalink
Current approximation of paging memory usage is based on the required
amount when running in shadow mode and doesn't take into account the
memory required by the IOMMU page tables.

Fix this by introducing a function to calculate the amount of memory
required by HAP/IOMMU page tables. The formula used to calculate such
approximation is based on the pessimistic approach that each 4KB
memory chunk will use 8 bytes of page table memory. Note that this
approximation might need further tuning based on testing on different
systems.

Also fix the calculation of the required paging related memory in
dom0_compute_nr_pages to take into account the paging implementation
(shadow or HAP) and whether the IOMMU pages tables are shared with the
HAP page tables.

Signed-off-by: Roger Pau Monné <***@citrix.com>
---
Cc: Jan Beulich <***@suse.com>
Cc: Andrew Cooper <***@citrix.com>
Cc: Wei Liu <***@citrix.com>
Cc: ***@bertin.fr
---
xen/arch/x86/dom0_build.c | 31 +++++++++++++++++++++++++++----
xen/arch/x86/hvm/dom0_build.c | 6 ++++--
xen/include/asm-x86/dom0_build.h | 2 ++
3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index ba9aa85611..3a8e138f23 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -267,6 +267,25 @@ unsigned long __init dom0_shadow_pages(const struct domain *d,
return ((memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
}

+unsigned long __init dom0_hap_pages(const struct domain *d,
+ unsigned long nr_pages)
+{
+ /*
+ * Attempt to account for at least some of the MMIO regions by adding the
+ * size of the holes in the memory map to the amount of pages to map. Note
+ * this will obviously not account for MMIO regions that are past the last
+ * RAM range in the memory map.
+ */
+ nr_pages += max_page - total_pages;
+ /*
+ * Approximate the memory required for the HAP/IOMMU page tables by
+ * pessimistically assuming each page will consume a 8 byte page table
+ * entry.
+ */
+ return DIV_ROUND_UP(nr_pages * 8, PAGE_SIZE << PAGE_ORDER_4K);
+}
+
+
unsigned long __init dom0_compute_nr_pages(
struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len)
{
@@ -294,8 +313,7 @@ unsigned long __init dom0_compute_nr_pages(
avail -= max_pdx >> s;
}

- need_paging = is_hvm_domain(d) &&
- (!iommu_hap_pt_share || !paging_mode_hap(d));
+ need_paging = is_hvm_domain(d);
for ( ; ; need_paging = false )
{
nr_pages = dom0_nrpages;
@@ -324,8 +342,13 @@ unsigned long __init dom0_compute_nr_pages(
if ( !need_paging )
break;

- /* Reserve memory for shadow or HAP. */
- avail -= dom0_shadow_pages(d, nr_pages);
+ /* Reserve memory for CPU and IOMMU page tables. */
+ if ( paging_mode_hap(d) )
+ avail -= dom0_hap_pages(d, nr_pages) *
+ (iommu_hap_pt_share ? 1 : 2);
+ else
+ avail -= dom0_shadow_pages(d, nr_pages) +
+ dom0_hap_pages(d, nr_pages);
}

if ( is_pv_domain(d) &&
diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index 2af2bd8c3d..051e999957 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -358,6 +358,9 @@ static int __init pvh_setup_p2m(struct domain *d)
{
struct vcpu *v = d->vcpu[0];
unsigned long nr_pages = dom0_compute_nr_pages(d, NULL, 0);
+ unsigned long paging_pages =
+ paging_mode_hap(d) ? dom0_hap_pages(d, nr_pages)
+ : dom0_shadow_pages(d, nr_pages);
unsigned int i;
int rc;
bool preempted;
@@ -366,8 +369,7 @@ static int __init pvh_setup_p2m(struct domain *d)
pvh_setup_e820(d, nr_pages);
do {
preempted = false;
- paging_set_allocation(d, dom0_shadow_pages(d, nr_pages),
- &preempted);
+ paging_set_allocation(d, paging_pages, &preempted);
process_pending_softirqs();
} while ( preempted );

diff --git a/xen/include/asm-x86/dom0_build.h b/xen/include/asm-x86/dom0_build.h
index 22f960b8b0..e1309c25e8 100644
--- a/xen/include/asm-x86/dom0_build.h
+++ b/xen/include/asm-x86/dom0_build.h
@@ -27,6 +27,8 @@ int dom0_construct_pvh(struct domain *d, const module_t *image,

unsigned long dom0_shadow_pages(const struct domain *d,
unsigned long nr_pages);
+unsigned long dom0_hap_pages(const struct domain *d,
+ unsigned long nr_pages);

void dom0_update_physmap(struct domain *d, unsigned long pfn,
unsigned long mfn, unsigned long vphysmap_s);
--
2.19.1
Wei Liu
2018-12-06 12:42:15 UTC
Permalink
Post by Roger Pau Monne
Current approximation of paging memory usage is based on the required
amount when running in shadow mode and doesn't take into account the
memory required by the IOMMU page tables.
Fix this by introducing a function to calculate the amount of memory
required by HAP/IOMMU page tables. The formula used to calculate such
approximation is based on the pessimistic approach that each 4KB
memory chunk will use 8 bytes of page table memory. Note that this
approximation might need further tuning based on testing on different
systems.
Also fix the calculation of the required paging related memory in
dom0_compute_nr_pages to take into account the paging implementation
(shadow or HAP) and whether the IOMMU pages tables are shared with the
HAP page tables.
---
---
xen/arch/x86/dom0_build.c | 31 +++++++++++++++++++++++++++----
xen/arch/x86/hvm/dom0_build.c | 6 ++++--
xen/include/asm-x86/dom0_build.h | 2 ++
3 files changed, 33 insertions(+), 6 deletions(-)
diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index ba9aa85611..3a8e138f23 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -267,6 +267,25 @@ unsigned long __init dom0_shadow_pages(const struct domain *d,
return ((memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
}
+unsigned long __init dom0_hap_pages(const struct domain *d,
+ unsigned long nr_pages)
+{
+ /*
+ * Attempt to account for at least some of the MMIO regions by adding the
+ * size of the holes in the memory map to the amount of pages to map. Note
+ * this will obviously not account for MMIO regions that are past the last
+ * RAM range in the memory map.
+ */
+ nr_pages += max_page - total_pages;
Do those regions past end of RAM range show up in E820 map? If so, why
not just sum up all reserved regions?

Wei.
Roger Pau Monné
2018-12-10 10:33:28 UTC
Permalink
Post by Wei Liu
Post by Roger Pau Monne
Current approximation of paging memory usage is based on the required
amount when running in shadow mode and doesn't take into account the
memory required by the IOMMU page tables.
Fix this by introducing a function to calculate the amount of memory
required by HAP/IOMMU page tables. The formula used to calculate such
approximation is based on the pessimistic approach that each 4KB
memory chunk will use 8 bytes of page table memory. Note that this
approximation might need further tuning based on testing on different
systems.
Also fix the calculation of the required paging related memory in
dom0_compute_nr_pages to take into account the paging implementation
(shadow or HAP) and whether the IOMMU pages tables are shared with the
HAP page tables.
---
---
xen/arch/x86/dom0_build.c | 31 +++++++++++++++++++++++++++----
xen/arch/x86/hvm/dom0_build.c | 6 ++++--
xen/include/asm-x86/dom0_build.h | 2 ++
3 files changed, 33 insertions(+), 6 deletions(-)
diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index ba9aa85611..3a8e138f23 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -267,6 +267,25 @@ unsigned long __init dom0_shadow_pages(const struct domain *d,
return ((memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
}
+unsigned long __init dom0_hap_pages(const struct domain *d,
+ unsigned long nr_pages)
+{
+ /*
+ * Attempt to account for at least some of the MMIO regions by adding the
+ * size of the holes in the memory map to the amount of pages to map. Note
+ * this will obviously not account for MMIO regions that are past the last
+ * RAM range in the memory map.
+ */
+ nr_pages += max_page - total_pages;
Do those regions past end of RAM range show up in E820 map?
No, BARs for example don't need to be in reserved regions. I've got
one box with a 16GB Tesla card that has the 16GB BAR placed way past
the last entry in the memory map, without any reserved region.

So while this approach is not perfect, it's better than what we
currently do, and we can always improve from there if it's clear what
limitations we currently have.

Thanks, Roger.

Wei Liu
2018-12-06 12:31:49 UTC
Permalink
Post by Roger Pau Monne
To note it's calculating the approximate amount of memory required by
shadow paging.
No functional change.
Reviewed-by: Wei Liu <***@citrix.com>
Loading...