Discussion:
[PATCH 3/6] kvm: arm64: Convert kvm_set_s2pte_readonly() from inline asm to cmpxchg()
Catalin Marinas
2017-07-25 13:53:05 UTC
Permalink
To take advantage of the LSE atomic instructions and also make the code
cleaner, convert the kvm_set_s2pte_readonly() function to use the more
generic cmpxchg().

Cc: Marc Zyngier <***@arm.com>
Cc: Christoffer Dall <***@linaro.org>
Cc: Will Deacon <***@arm.com>
Acked-by: Mark Rutland <***@arm.com>
Signed-off-by: Catalin Marinas <***@arm.com>
---
arch/arm64/include/asm/kvm_mmu.h | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a89cc22abadc..40b3ea690826 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -175,18 +175,14 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)

static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
- pteval_t pteval;
- unsigned long tmp;
-
- asm volatile("// kvm_set_s2pte_readonly\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " and %0, %0, %3 // clear PTE_S2_RDWR\n"
- " orr %0, %0, %4 // set PTE_S2_RDONLY\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
- : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
+ pteval_t old_pteval, pteval;
+
+ do {
+ pteval = old_pteval = READ_ONCE(pte_val(*pte));
+ pteval &= ~PTE_S2_RDWR;
+ pteval |= PTE_S2_RDONLY;
+ } while (cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval) !=
+ old_pteval);
}

static inline bool kvm_s2pte_readonly(pte_t *pte)
Catalin Marinas
2017-07-25 13:53:06 UTC
Permalink
Currently PTE_RDONLY is treated as a hardware only bit and not handled
by the pte_mkwrite(), pte_wrprotect() or the user PAGE_* definitions.
The set_pte_at() function is responsible for setting this bit based on
the write permission or dirty state. This patch moves the PTE_RDONLY
handling out of set_pte_at into the pte_mkwrite()/pte_wrprotect()
functions. The PAGE_* definitions to need to be updated to explicitly
include PTE_RDONLY when !PTE_WRITE.

Cc: Will Deacon <***@arm.com>
Signed-off-by: Catalin Marinas <***@arm.com>
---
arch/arm64/include/asm/pgtable-prot.h | 12 ++++++------
arch/arm64/include/asm/pgtable.h | 34 ++++++++++------------------------
arch/arm64/kernel/hibernate.c | 4 ++--
arch/arm64/mm/fault.c | 6 +-----
4 files changed, 19 insertions(+), 37 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..9b7af598b375 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -63,14 +63,14 @@
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)

-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
+#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)

#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4580d5992d0c..a14e2120811c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -125,12 +125,16 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)

static inline pte_t pte_wrprotect(pte_t pte)
{
- return clear_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
}

static inline pte_t pte_mkwrite(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
}

static inline pte_t pte_mkclean(pte_t pte)
@@ -169,16 +173,6 @@ static inline pte_t pte_mknoncont(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}

-static inline pte_t pte_clear_rdonly(pte_t pte)
-{
- return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
-}
-
-static inline pte_t pte_set_rdonly(pte_t pte)
-{
- return set_pte_bit(pte, __pgprot(PTE_RDONLY));
-}
-
static inline pte_t pte_mkpresent(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -226,14 +220,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- if (pte_present(pte)) {
- if (pte_sw_dirty(pte) && pte_write(pte))
- pte_val(pte) &= ~PTE_RDONLY;
- else
- pte_val(pte) |= PTE_RDONLY;
- if (pte_user_exec(pte) && !pte_special(pte))
- __sync_icache_dcache(pte, addr);
- }
+ if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte, addr);

/*
* If the existing pte is valid, check for potential race with
@@ -656,12 +644,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
pte = old_pte = READ_ONCE(*ptep);
/*
* If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
- * clear), set the PTE_DIRTY and PTE_RDONLY bits.
+ * clear), set the PTE_DIRTY bit.
*/
- if (pte_hw_dirty(pte)) {
+ if (pte_hw_dirty(pte))
pte = pte_mkdirty(pte);
- pte = pte_set_rdonly(pte);
- }
pte = pte_wrprotect(pte);
} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
pte_val(pte)) != pte_val(old_pte));
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a44e13942d30..095d3c170f5d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
- set_pte(dst_pte, pte_clear_rdonly(pte));
+ set_pte(dst_pte, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
@@ -343,7 +343,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));

- set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+ set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
}
}

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 65ed66bb2828..cfff98a97a7c 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -161,11 +161,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
return 0;

/* only preserve the access flags and write permission */
- pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
-
- /* set PTE_RDONLY if actual read-only or clean PTE */
- if (!pte_write(entry) || !pte_sw_dirty(entry))
- entry = pte_set_rdonly(entry);
+ pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;

/*
* Setting the flags must be done atomically to avoid racing with the
Catalin Marinas
2017-07-25 13:53:03 UTC
Permalink
In a system with DBM (dirty bit management) capable agents there is a
possible race between a CPU executing ptep_set_access_flags() (maybe
non-DBM capable) and a hardware update of the dirty state (clearing of
PTE_RDONLY). The scenario:

a) the pte is writable (PTE_WRITE set), clean (PTE_RDONLY set) and old
(PTE_AF clear)
b) ptep_set_access_flags() is called as a result of a read access and it
needs to set the pte to writable, clean and young (PTE_AF set)
c) a DBM-capable agent, as a result of a different write access, is
marking the entry as young (setting PTE_AF) and dirty (clearing
PTE_RDONLY)

The current ptep_set_access_flags() implementation would set the
PTE_RDONLY bit in the resulting value overriding the DBM update and
losing the dirty state.

This patch fixes such race by setting PTE_RDONLY to the most permissive
(lowest value) of the current entry and the new one.

Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM")
Cc: Will Deacon <***@arm.com>
Acked-by: Mark Rutland <***@arm.com>
Acked-by: Steve Capper <***@arm.com>
Signed-off-by: Catalin Marinas <***@arm.com>
---
arch/arm64/mm/fault.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c7861c9864e6..2509e4fe6992 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -163,26 +163,27 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
/* only preserve the access flags and write permission */
pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;

- /*
- * PTE_RDONLY is cleared by default in the asm below, so set it in
- * back if necessary (read-only or clean PTE).
- */
+ /* set PTE_RDONLY if actual read-only or clean PTE */
if (!pte_write(entry) || !pte_sw_dirty(entry))
pte_val(entry) |= PTE_RDONLY;

/*
* Setting the flags must be done atomically to avoid racing with the
- * hardware update of the access/dirty state.
+ * hardware update of the access/dirty state. The PTE_RDONLY bit must
+ * be set to the most permissive (lowest value) of *ptep and entry
+ * (calculated as: a & b == ~(~a | ~b)).
*/
+ pte_val(entry) ^= PTE_RDONLY;
asm volatile("// ptep_set_access_flags\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
- " and %0, %0, %3 // clear PTE_RDONLY\n"
+ " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n"
" orr %0, %0, %4 // set flags\n"
+ " eor %0, %0, %3 // negate final PTE_RDONLY\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
- : "L" (~PTE_RDONLY), "r" (pte_val(entry)));
+ : "L" (PTE_RDONLY), "r" (pte_val(entry)));

flush_tlb_fix_spurious_fault(vma, address);
return 1;
Catalin Marinas
2017-07-25 13:53:04 UTC
Permalink
With the support for hardware updates of the access and dirty states,
the following pte handling functions had to be implemented using
exclusives: __ptep_test_and_clear_young(), ptep_get_and_clear(),
ptep_set_wrprotect() and ptep_set_access_flags(). To take advantage of
the LSE atomic instructions and also make the code cleaner, convert
these pte functions to use the more generic cmpxchg()/xchg().

Cc: Will Deacon <***@arm.com>
Acked-by: Mark Rutland <***@arm.com>
Acked-by: Steve Capper <***@arm.com>
Signed-off-by: Catalin Marinas <***@arm.com>
---
arch/arm64/include/asm/pgtable.h | 67 +++++++++++++++++-----------------------
arch/arm64/mm/fault.c | 23 ++++++--------
2 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6eae342ced6b..4580d5992d0c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -39,6 +39,7 @@

#ifndef __ASSEMBLY__

+#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
#include <linux/mmdebug.h>

@@ -173,6 +174,11 @@ static inline pte_t pte_clear_rdonly(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
}

+static inline pte_t pte_set_rdonly(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
static inline pte_t pte_mkpresent(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -593,20 +599,15 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int __ptep_test_and_clear_young(pte_t *ptep)
{
- pteval_t pteval;
- unsigned int tmp, res;
+ pte_t old_pte, pte;

- asm volatile("// __ptep_test_and_clear_young\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
- " and %0, %0, %4 // clear PTE_AF\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
- : "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
+ do {
+ old_pte = READ_ONCE(*ptep);
+ pte = pte_mkold(old_pte);
+ } while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
+ pte_val(pte)) != pte_val(old_pte));

- return res;
+ return pte_young(old_pte);
}

static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -630,17 +631,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- pteval_t old_pteval;
- unsigned int tmp;
-
- asm volatile("// ptep_get_and_clear\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " stxr %w1, xzr, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
-
- return __pte(old_pteval);
+ return __pte(xchg_relaxed(&pte_val(*ptep), 0));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -659,21 +650,21 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
- pteval_t pteval;
- unsigned long tmp;
-
- asm volatile("// ptep_set_wrprotect\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n"
- " csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n"
- " orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n"
- " and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
- : "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
- : "cc");
+ pte_t old_pte, pte;
+
+ do {
+ pte = old_pte = READ_ONCE(*ptep);
+ /*
+ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+ * clear), set the PTE_DIRTY and PTE_RDONLY bits.
+ */
+ if (pte_hw_dirty(pte)) {
+ pte = pte_mkdirty(pte);
+ pte = pte_set_rdonly(pte);
+ }
+ pte = pte_wrprotect(pte);
+ } while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
+ pte_val(pte)) != pte_val(old_pte));
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2509e4fe6992..65ed66bb2828 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/hugetlb.h>

#include <asm/bug.h>
+#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
#include <asm/debug-monitors.h>
@@ -154,8 +155,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
- pteval_t old_pteval;
- unsigned int tmp;
+ pteval_t old_pteval, pteval;

if (pte_same(*ptep, entry))
return 0;
@@ -165,7 +165,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,

/* set PTE_RDONLY if actual read-only or clean PTE */
if (!pte_write(entry) || !pte_sw_dirty(entry))
- pte_val(entry) |= PTE_RDONLY;
+ entry = pte_set_rdonly(entry);

/*
* Setting the flags must be done atomically to avoid racing with the
@@ -174,16 +174,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* (calculated as: a & b == ~(~a | ~b)).
*/
pte_val(entry) ^= PTE_RDONLY;
- asm volatile("// ptep_set_access_flags\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n"
- " orr %0, %0, %4 // set flags\n"
- " eor %0, %0, %3 // negate final PTE_RDONLY\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
- : "L" (PTE_RDONLY), "r" (pte_val(entry)));
+ do {
+ old_pteval = READ_ONCE(pte_val(*ptep));
+ pteval = old_pteval ^ PTE_RDONLY;
+ pteval |= pte_val(entry);
+ pteval ^= PTE_RDONLY;
+ } while (cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval) !=
+ old_pteval);

flush_tlb_fix_spurious_fault(vma, address);
return 1;
Catalin Marinas
2017-07-25 13:53:07 UTC
Permalink
ptep_set_wrprotect() is only called on CoW mappings which are private
(!VM_SHARED) with the pte either read-only (!PTE_WRITE && PTE_RDONLY) or
writable and software-dirty (PTE_WRITE && !PTE_RDONLY && PTE_DIRTY).
There is no race with the hardware update of the dirty state: clearing
of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) is set. This patch removes
the code setting the software PTE_DIRTY bit in ptep_set_wrprotect() as
superfluous. A VM_WARN_ONCE is introduced in case the above logic is
wrong or the core mm code changes its use of ptep_set_wrprotect().

Cc: Will Deacon <***@arm.com>
Acked-by: Steve Capper <***@arm.com>
Signed-off-by: Catalin Marinas <***@arm.com>
---
arch/arm64/include/asm/pgtable.h | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index a14e2120811c..3fefcc0182c7 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -632,23 +632,28 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

/*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
- * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ * ptep_set_wrprotect - mark read-only while preserving the hardware update of
+ * the Access Flag.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pte_t old_pte, pte;

+ /*
+ * ptep_set_wrprotect() is only called on CoW mappings which are
+ * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
+ * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
+ * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
+ * protection_map[]. There is no race with the hardware update of the
+ * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
+ * is set.
+ */
+ VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
+ "%s: potential race with hardware DBM", __func__);
do {
- pte = old_pte = READ_ONCE(*ptep);
- /*
- * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
- * clear), set the PTE_DIRTY bit.
- */
- if (pte_hw_dirty(pte))
- pte = pte_mkdirty(pte);
- pte = pte_wrprotect(pte);
+ old_pte = READ_ONCE(*ptep);
+ pte = pte_wrprotect(old_pte);
} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
pte_val(pte)) != pte_val(old_pte));
}
Catalin Marinas
2017-07-25 13:53:08 UTC
Permalink
Since the pte handling for hardware AF/DBM works even when the hardware
feature is not present, make the implementation permanent and remove the
Kconfig option.

Cc: Will Deacon <***@arm.com>
Cc: Marc Zyngier <***@arm.com>
Cc: Christoffer Dall <***@linaro.org>
Signed-off-by: Catalin Marinas <***@arm.com>
---
arch/arm64/Kconfig | 17 -----------------
arch/arm64/include/asm/pgtable.h | 9 +--------
arch/arm64/kvm/hyp/s2-setup.c | 2 +-
arch/arm64/mm/fault.c | 2 --
arch/arm64/mm/proc.S | 3 +--
5 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..eeb61d800441 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -879,23 +879,6 @@ config ARM64_SW_TTBR0_PAN

menu "ARMv8.1 architectural features"

-config ARM64_HW_AFDBM
- bool "Support for hardware updates of the Access and Dirty page flags"
- default y
- help
- The ARMv8.1 architecture extensions introduce support for
- hardware updates of the access and dirty information in page
- table entries. When enabled in TCR_EL1 (HA and HD bits) on
- capable processors, accesses to pages with PTE_AF cleared will
- set this bit instead of raising an access flag fault.
- Similarly, writes to read-only pages with the DBM bit set will
- clear the read-only bit (AP[2]) instead of raising a
- permission fault.
-
- Kernels built with this configuration option enabled continue
- to work on pre-ARMv8.1 hardware and the performance impact is
- minimal. If unsure, say Y.
-
config ARM64_PAN
bool "Enable support for Privileged Access Never (PAN)"
default y
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 3fefcc0182c7..e23811f6b9da 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -85,11 +85,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})

-#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
-#else
-#define pte_hw_dirty(pte) (0)
-#endif
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))

@@ -228,8 +224,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
- pte_valid(*ptep) && pte_valid(pte)) {
+ if (pte_valid(*ptep) && pte_valid(pte)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
@@ -565,7 +560,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}

-#ifdef CONFIG_ARM64_HW_AFDBM
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -666,7 +660,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#endif
-#endif /* CONFIG_ARM64_HW_AFDBM */

extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909..a81f5e10fc8c 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -70,7 +70,7 @@ u32 __hyp_text __init_stage2_translation(void)
* Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
- if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+ if (tmp)
val |= VTCR_EL2_HA;

/*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index cfff98a97a7c..ce361234e78b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -140,7 +140,6 @@ void show_pte(unsigned long addr)
pr_cont("\n");
}

-#ifdef CONFIG_ARM64_HW_AFDBM
/*
* This function sets the access flags (dirty, accessed), as well as write
* permission, and only to a more permissive setting.
@@ -181,7 +180,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
flush_tlb_fix_spurious_fault(vma, address);
return 1;
}
-#endif

static bool is_el1_instruction_abort(unsigned int esr)
{
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 877d42fb0df6..ba82f8bf3cf1 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -234,7 +234,7 @@ ENTRY(__cpu_setup)
*/
mrs x9, ID_AA64MMFR0_EL1
bfi x10, x9, #32, #3
-#ifdef CONFIG_ARM64_HW_AFDBM
+
/*
* Hardware update of the Access and Dirty bits.
*/
@@ -246,7 +246,6 @@ ENTRY(__cpu_setup)
orr x10, x10, #TCR_HD // hardware Dirty flag update
1: orr x10, x10, #TCR_HA // hardware Access flag update
2:
-#endif /* CONFIG_ARM64_HW_AFDBM */
msr tcr_el1, x10
ret // return to head.S
ENDPROC(__cpu_setup)

Loading...