[PATCH 0/3] genirq: Add support for "split-EOI" irqchips

--
2.1.0

Marc Zyngier

2014-10-25 11:06:54 UTC

So far, GICv2 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself. This is also rather
inefficient for threaded interrupts, as it forces us to hit the
distributor (which is slower, specially when using virtualization).

For this case, the GIC architecture provides EOImode == 1, where:
- A write to the EOI register drops the priority of the interrupt and leaves
it active. Other interrupts at the same priority level can now be taken,
but the active interrupt cannot be taken again
- A write to the DIR marks the interrupt as inactive, meaning it can
now be taken again.

We only enable this feature when booted in HYP mode. Also, as most device
trees are broken (they report the CPU interface size to be 4kB, while
the GICv2 CPU interface size is 8kB), output a warning if we're booted
in HYP mode, and disable the feature.

Signed-off-by: Marc Zyngier <***@arm.com>
---
drivers/irqchip/irq-gic.c | 64 +++++++++++++++++++++++++++++++++++++----
include/linux/irqchip/arm-gic.h | 4 +++
2 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index d78169e..53aedc8 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -44,6 +44,7 @@
#include <asm/irq.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>

#include "irq-gic-common.h"
#include "irqchip.h"
@@ -93,6 +94,10 @@ struct irq_chip gic_arch_extn = {
.irq_set_wake = NULL,
};

+static struct irq_chip *gic_chip;
+
+static bool supports_deactivate = false;
+
#ifndef MAX_GIC_NR
#define MAX_GIC_NR 1
#endif
@@ -192,6 +197,16 @@ static void gic_eoi_irq(struct irq_data *d)
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
}

+static void gic_priority_drop_irq(struct irq_data *d)
+{
+ gic_eoi_irq(d);
+}
+
+static void gic_deactivate_irq(struct irq_data *d)
+{
+ writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+}
+
static void gic_irq_set_irqchip_state(struct irq_data *d, int state, int val)
{
u32 reg;
@@ -333,6 +348,8 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
}
if (irqnr < 16) {
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
+ if (supports_deactivate)
+ writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#endif
@@ -369,7 +386,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
chained_irq_exit(chip, desc);
}

-static struct irq_chip gic_chip = {
+static struct irq_chip gicv1_chip = {
.name = "GIC",
.irq_mask = gic_mask_irq,
.irq_unmask = gic_unmask_irq,
@@ -384,6 +401,23 @@ static struct irq_chip gic_chip = {
.irq_set_irqchip_state = gic_irq_set_irqchip_state,
};

+static struct irq_chip gicv2_chip = {
+ .name = "GIC",
+ .flags = IRQCHIP_EOI_THREADED,
+ .irq_mask = gic_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_priority_drop = gic_priority_drop_irq,
+ .irq_eoi = gic_deactivate_irq,
+ .irq_set_type = gic_set_type,
+ .irq_retrigger = gic_retrigger,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = gic_set_affinity,
+#endif
+ .irq_set_wake = gic_set_wake,
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+};
+
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
if (gic_nr >= MAX_GIC_NR)
@@ -415,7 +449,11 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic)
static void gic_cpu_if_up(void)
{
void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
- u32 bypass = 0;
+ u32 bypass;
+ u32 mode = 0;
+
+ if (supports_deactivate)
+ mode = GIC_CPU_CTRL_EOImodeNS;

/*
* Preserve bypass disable bits to be written back later
@@ -423,7 +461,7 @@ static void gic_cpu_if_up(void)
bypass = readl(cpu_base + GIC_CPU_CTRL);
bypass &= GICC_DIS_BYPASS_MASK;

- writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+ writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
}

@@ -848,11 +886,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
{
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_set_chip_and_handler(irq, &gic_chip,
+ irq_set_chip_and_handler(irq, gic_chip,
handle_percpu_devid_irq);
set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
} else {
- irq_set_chip_and_handler(irq, &gic_chip,
+ irq_set_chip_and_handler(irq, gic_chip,
handle_fasteoi_irq);
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);

@@ -1031,6 +1069,11 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,

gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */

+ if (!supports_deactivate)
+ gic_chip = &gicv1_chip;
+ else
+ gic_chip = &gicv2_chip;
+
if (of_property_read_u32(node, "arm,routable-irqs",
&nr_routable_irqs)) {
irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
@@ -1060,7 +1103,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
set_handle_irq(gic_handle_irq);
}

- gic_chip.flags |= gic_arch_extn.flags;
+ gic_chip->flags |= gic_arch_extn.flags;
gic_dist_init(gic);
gic_cpu_init(gic);
gic_pm_init(gic);
@@ -1074,6 +1117,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *cpu_base;
void __iomem *dist_base;
+ struct resource cpu_res;
u32 percpu_offset;
int irq;

@@ -1086,6 +1130,14 @@ gic_of_init(struct device_node *node, struct device_node *parent)
cpu_base = of_iomap(node, 1);
WARN(!cpu_base, "unable to map gic cpu registers\n");

+ of_address_to_resource(node, 1, &cpu_res);
+ if (is_hyp_mode_available()) {
+ if (resource_size(&cpu_res) >= SZ_8K)
+ supports_deactivate = true;
+ else
+ pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
+ }
+
if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
percpu_offset = 0;

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 13eed92..e8de29a 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -20,9 +20,13 @@
#define GIC_CPU_ALIAS_BINPOINT 0x1c
#define GIC_CPU_ACTIVEPRIO 0xd0
#define GIC_CPU_IDENT 0xfc
+#define GIC_CPU_DEACTIVATE 0x1000

#define GICC_ENABLE 0x1
#define GICC_INT_PRI_THRESHOLD 0xf0
+
+#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
+
#define GICC_IAR_INT_ID_MASK 0x3ff
#define GICC_INT_SPURIOUS 1023
#define GICC_DIS_BYPASS_MASK 0x1e0

--
2.1.0

Marc Zyngier

2014-10-25 11:06:55 UTC

So far, GICv3 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself. This is also rather
inefficient for threaded interrupts, as it forces us to hit the
distributor (which is slower, specially when using virtualization).

For this case, the GIC architecture provides EOImode == 1, where:
- A write to ICC_EOIR1_EL1 drops the priority of the interrupt and leaves
it active. Other interrupts at the same priority level can now be taken,
but the active interrupt cannot be taken again
- A write to ICC_DIR_EL1 marks the interrupt as inactive, meaning it can
now be taken again.

This patch converts the driver to this new mode when virtualization is
enabled.

Signed-off-by: Marc Zyngier <***@arm.com>
---
drivers/irqchip/irq-gic-v3.c | 30 ++++++++++++++++++++++++++++--
include/linux/irqchip/arm-gic-v3.h | 10 ++++++++++
2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 666c14e..c7f8a25 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -30,6 +30,7 @@
#include <asm/cputype.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>

#include "irq-gic-common.h"
#include "irqchip.h"
@@ -45,6 +46,7 @@ struct gic_chip_data {
};

static struct gic_chip_data gic_data __read_mostly;
+static bool supports_deactivate = false;

#define gic_data_rdist() (this_cpu_ptr(gic_data.rdist))
#define gic_data_rdist_rd_base() (*gic_data_rdist())
@@ -289,6 +291,16 @@ static void gic_eoi_irq(struct irq_data *d)
gic_write_eoir(gic_irq(d));
}

+static void gic_priority_drop_irq(struct irq_data *d)
+{
+ gic_write_eoir(gic_irq(d));
+}
+
+static void gic_deactivate_irq(struct irq_data *d)
+{
+ gic_write_dir(gic_irq(d));
+}
+
static int gic_set_type(struct irq_data *d, unsigned int type)
{
unsigned int irq = gic_irq(d);
@@ -345,6 +357,8 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
}
if (irqnr < 16) {
gic_write_eoir(irqnr);
+ if (supports_deactivate)
+ gic_write_dir(irqnr);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#else
@@ -441,8 +455,13 @@ static void gic_cpu_sys_reg_init(void)
/* Set priority mask register */
gic_write_pmr(DEFAULT_PMR_VALUE);

- /* EOI deactivates interrupt too (mode 0) */
- gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ if (supports_deactivate) {
+ /* EOI drops priority only (mode 1) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
+ } else {
+ /* EOI deactivates interrupt too (mode 0) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ }

/* ... and let's hit the road... */
gic_write_grpen1(1);
@@ -738,6 +757,13 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
redist_stride = 0;

+ if (is_hyp_mode_available()) {
+ supports_deactivate = true;
+ gic_chip.flags = IRQCHIP_EOI_THREADED,
+ gic_chip.irq_priority_drop = gic_priority_drop_irq;
+ gic_chip.irq_eoi = gic_deactivate_irq;
+ }
+
gic_data.dist_base = dist_base;
gic_data.redist_base = redist_base;
gic_data.redist_regions = redist_regions;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 03a4ea3..9875502 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -74,6 +74,9 @@
#define GICR_SYNCR 0x00C0
#define GICR_MOVLPIR 0x0100
#define GICR_MOVALLR 0x0110
+#define GICR_ISACTIVER GICD_ISACTIVER
+#define GICR_ICACTIVER GICD_ICACTIVER
+#define GICR_IDREGS GICD_IDREGS
#define GICR_PIDR2 GICD_PIDR2

#define GICR_WAKER_ProcessorSleep (1U << 1)
@@ -128,6 +131,7 @@
#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)

#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
@@ -195,6 +199,12 @@ static inline void gic_write_eoir(u64 irq)
isb();
}

+static inline void gic_write_dir(u64 irq)
+{
+ asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+ isb();
+}
+
#endif

#endif

--
2.1.0

Marc Zyngier

2014-10-25 11:06:53 UTC

Moderately recent ARM interrupt controllers can use a "split mode" EOI,
where instead of just using a single write to notify the controller of
the end of interrupt, uses the following:
- priority-drop: the interrupt is still active, but other interrupts can
now be taken
- deactivate: the interrupt is not active anymore, and can be taken again.

This makes it very useful for threaded interrupts, as it avoids the usual
mask/unmask dance (and has the potential of being more efficient on ARM,
as it is using the CPU interface instead of the global distributor).

To implement this, a new optional irqchip method is added (irq_priority_drop).
The usual irq_eoi is expected to implement the deactivate method.

Non threaded interrupts are using these two callbacks back to back, but threaded
ones only perform the irq_priority_drop call in the interrupt context, leaving
the irq_eoi call to the thread context (which are expected to use the
IRQCHIP_EOI_THREADED flag).

Signed-off-by: Marc Zyngier <***@arm.com>
---
include/linux/irq.h | 1 +
kernel/irq/chip.c | 53 +++++++++++++++++++++++++++++++++++++----------------
2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 257d59a..64d3756 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -330,6 +330,7 @@ struct irq_chip {
void (*irq_mask)(struct irq_data *data);
void (*irq_mask_ack)(struct irq_data *data);
void (*irq_unmask)(struct irq_data *data);
+ void (*irq_priority_drop)(struct irq_data *data);
void (*irq_eoi)(struct irq_data *data);

int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e5202f0..cf9d001 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -272,12 +272,25 @@ void mask_irq(struct irq_desc *desc)
}
}

+static void mask_threaded_irq(struct irq_desc *desc)
+{
+ struct irq_chip *chip = desc->irq_data.chip;
+
+ /* If we can do priority drop, then masking comes for free */
+ if (chip->irq_priority_drop)
+ irq_state_set_masked(desc);
+ else
+ mask_irq(desc);
+}
+
void unmask_irq(struct irq_desc *desc)
{
- if (desc->irq_data.chip->irq_unmask) {
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ struct irq_chip *chip = desc->irq_data.chip;
+
+ if (chip->irq_unmask && !chip->irq_priority_drop)
+ chip->irq_unmask(&desc->irq_data);
+ if (chip->irq_unmask || chip->irq_priority_drop)
irq_state_clr_masked(desc);
- }
}

void unmask_threaded_irq(struct irq_desc *desc)
@@ -287,10 +300,7 @@ void unmask_threaded_irq(struct irq_desc *desc)
if (chip->flags & IRQCHIP_EOI_THREADED)
chip->irq_eoi(&desc->irq_data);

- if (chip->irq_unmask) {
- chip->irq_unmask(&desc->irq_data);
- irq_state_clr_masked(desc);
- }
+ unmask_irq(desc);
}

/*
@@ -470,12 +480,24 @@ static inline void preflow_handler(struct irq_desc *desc)
static inline void preflow_handler(struct irq_desc *desc) { }
#endif

+static void eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
+{
+ if (chip->irq_priority_drop)
+ chip->irq_priority_drop(&desc->irq_data);
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+}
+
static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
{
if (!(desc->istate & IRQS_ONESHOT)) {
- chip->irq_eoi(&desc->irq_data);
+ eoi_irq(desc, chip);
return;
}
+
+ if (chip->irq_priority_drop)
+ chip->irq_priority_drop(&desc->irq_data);
+
/*
* We need to unmask in the following cases:
* - Oneshot irq which did not wake the thread (caused by a
@@ -485,7 +507,8 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
if (!irqd_irq_disabled(&desc->irq_data) &&
irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) {
chip->irq_eoi(&desc->irq_data);
- unmask_irq(desc);
+ if (!chip->irq_priority_drop)
+ unmask_irq(desc);
} else if (!(chip->flags & IRQCHIP_EOI_THREADED)) {
chip->irq_eoi(&desc->irq_data);
}
@@ -525,7 +548,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
}

if (desc->istate & IRQS_ONESHOT)
- mask_irq(desc);
+ mask_threaded_irq(desc);

preflow_handler(desc);
handle_irq_event(desc);
@@ -536,7 +559,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
return;
out:
if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
- chip->irq_eoi(&desc->irq_data);
+ eoi_irq(desc, chip);
raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
@@ -655,7 +678,7 @@ void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc)
!irqd_irq_disabled(&desc->irq_data));

out_eoi:
- chip->irq_eoi(&desc->irq_data);
+ eoi_irq(desc, chip);
raw_spin_unlock(&desc->lock);
}
#endif
@@ -679,8 +702,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)

handle_irq_event_percpu(desc, desc->action);

- if (chip->irq_eoi)
- chip->irq_eoi(&desc->irq_data);
+ eoi_irq(desc, chip);
}

/**
@@ -711,8 +733,7 @@ void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc)
res = action->handler(irq, dev_id);
trace_irq_handler_exit(irq, action, res);

- if (chip->irq_eoi)
- chip->irq_eoi(&desc->irq_data);
+ eoi_irq(desc, chip);
}

void

--
2.1.0

Thomas Gleixner

2014-10-25 20:27:37 UTC

On Sat, 25 Oct 2014, Marc Zyngier wrote:

@@ -330,6 +330,7 @@ struct irq_chip {
void (*irq_mask)(struct irq_data *data);
void (*irq_mask_ack)(struct irq_data *data);
void (*irq_unmask)(struct irq_data *data);
+ void (*irq_priority_drop)(struct irq_data *data);

Lacks the docbook comment.

Post by Marc Zyngier
+static void mask_threaded_irq(struct irq_desc *desc)

There is only one caller for this, i.e handle_fasteoi_irq, right? So
this should go to the other eoi handler specific helpers and have eoi
in its name.

Post by Marc Zyngier
+{
+ struct irq_chip *chip = desc->irq_data.chip;
+
+ /* If we can do priority drop, then masking comes for free */
+ if (chip->irq_priority_drop)
+ irq_state_set_masked(desc);
+ else
+ mask_irq(desc);
+}
void unmask_irq(struct irq_desc *desc)
{
- if (desc->irq_data.chip->irq_unmask) {
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ struct irq_chip *chip = desc->irq_data.chip;
+
+ if (chip->irq_unmask && !chip->irq_priority_drop)
+ chip->irq_unmask(&desc->irq_data);

I have a hard time to understand that logic. Assume the interrupt
being masked at the hardware level after boot. Now at request_irq()
time what is going to unmask that very interrupt? Ditto for masking
after disable_irq(). Probably not what you really want.

Post by Marc Zyngier
+static void eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
+{
+ if (chip->irq_priority_drop)
+ chip->irq_priority_drop(&desc->irq_data);
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+}

So if you are using that priority drop stuff, you need both calls even
for the non threaded case?

Post by Marc Zyngier
static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
{
if (!(desc->istate & IRQS_ONESHOT)) {
- chip->irq_eoi(&desc->irq_data);
+ eoi_irq(desc, chip);
return;
}
+
+ if (chip->irq_priority_drop)
+ chip->irq_priority_drop(&desc->irq_data);
+
/*
* - Oneshot irq which did not wake the thread (caused by a
@@ -485,7 +507,8 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
if (!irqd_irq_disabled(&desc->irq_data) &&
irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) {
chip->irq_eoi(&desc->irq_data);
- unmask_irq(desc);
+ if (!chip->irq_priority_drop)
+ unmask_irq(desc);

This is really completely obfuscated: Brain starts melting and
spiraling towards some unidentified universe.

Seriously, I don't think it's a good idea to bandaid this
functionality into the existing handle_fasteoi_irq() mechanism. It's
complex enough already.

So what you really want is a separate handler for this. But aside of
adding the drop prio callback you probably want to handle the other
existing callbacks completely differently than for the regular mode of
that irq controller.

Can you please explain detailed how this "priority drop" mode
works?

Thanks,

tglx

Thomas Gleixner

2014-10-25 20:40:43 UTC

Post by Thomas Gleixner
Can you please explain detailed how this "priority drop" mode
works?

And I mean how it works from all aspects, not only from handling the
interrupt itself.

Thanks,

tglx

Marc Zyngier

2014-10-27 15:42:16 UTC

Hi Thomas,

Thanks for looking into this.

Post by Marc Zyngier
@@ -330,6 +330,7 @@ struct irq_chip {
void (*irq_mask)(struct irq_data *data);
void (*irq_mask_ack)(struct irq_data *data);
void (*irq_unmask)(struct irq_data *data);
+ void (*irq_priority_drop)(struct irq_data *data);
Lacks the docbook comment.

Yup, will add.

Post by Marc Zyngier
+static void mask_threaded_irq(struct irq_desc *desc)

There is only one caller for this, i.e handle_fasteoi_irq, right? So
this should go to the other eoi handler specific helpers and have eoi
in its name.

I was seeing it as the pendent of unmask_threaded_irq(). But reading
below, you seem to have a very different approach

Peering at the code (and assuming I'm finally awake), request_irq() uses
irq_startup() -> irq_enable() -> chip->irq_unmask().

But you're perfectly right, it breaks an independent use of
unmask_irq(), which is pretty bad.

So if you are using that priority drop stuff, you need both calls even
for the non threaded case?

Yes. This is a global property (all interrupt lines for this irqchip are
affected), so even the non-threaded case has to issue both calls.

This is really completely obfuscated: Brain starts melting and
spiraling towards some unidentified universe.

Ah! I'm glad I'm not the only one with that feeling ;-).

Post by Marc Zyngier
Seriously, I don't think it's a good idea to bandaid this
functionality into the existing handle_fasteoi_irq() mechanism. It's
complex enough already.

That was the other option. I may have to duplicate (or tweak)
handle_percpu_devid_irq as well though.

Post by Marc Zyngier
So what you really want is a separate handler for this. But aside of
adding the drop prio callback you probably want to handle the other
existing callbacks completely differently than for the regular mode of
that irq controller.
Can you please explain detailed how this "priority drop" mode
works?

The basics of this mode are pretty simple:
- Interrupt signalled, CPU enter the GIC code
- Read the IAR register, interrupt becomes active:
-> no other interrupt can be taken
- Run whatever interrupt handler
- Write to the EOI register:
-> interrupt is still active, and cannot be taken again, but other
interrupts can now be taken
- Write to the DIR register:
-> interrupt is now inactive, and can be taken again.

A few interesting things here:
- EOI (which causes priority drop) acts as a mask
- DIR (which causes deactivate) acts as unmask+EOI

To me, it looks like DIR operation is exactly what we need when running
a threaded interrupt with IRQCHIP_EOI_THREADED, saving the whole
mask/unmask that is rather slow on ARM.

With that in mind, I end up mapping mask to priority_drop_irq (write to
EOI), and unmask to eoi_irq (write to DIR). Which is admittedly an
interesting brainfuck when trying to wire it into the existing framework.

So yeah, having a different handler will make it much simpler. My main
concern is how to plug this "elegantly" into the epilogue for a threaded
interrupt (irq_finalize_oneshot).

Thanks,

M.

--
Jazz is not dead. It just smells funny...

Thomas Gleixner

2014-10-28 15:32:07 UTC

Peering at the code (and assuming I'm finally awake), request_irq() uses
irq_startup() -> irq_enable() -> chip->irq_unmask().

Right. That's the default implementation.

Post by Marc Zyngier
But you're perfectly right, it breaks an independent use of
unmask_irq(), which is pretty bad.

Indeed.

So if you are using that priority drop stuff, you need both calls even
for the non threaded case?

Yes. This is a global property (all interrupt lines for this irqchip are
affected), so even the non-threaded case has to issue both calls.

Ok.

Post by Thomas Gleixner
Can you please explain detailed how this "priority drop" mode
works?

- Interrupt signalled, CPU enter the GIC code
-> no other interrupt can be taken
- Run whatever interrupt handler
-> interrupt is still active, and cannot be taken again, but other
interrupts can now be taken
-> interrupt is now inactive, and can be taken again.
- EOI (which causes priority drop) acts as a mask
- DIR (which causes deactivate) acts as unmask+EOI

Let me make a few assumptions and correct me if I'm wrong as usual.

1) The startup/shutdown procedure for such an interrupt is the
expensive mask/unmask which you want to avoid for the actual
handling case

2) In case of an actual interrupt the flow (ignoring locking) is:

handle_xxx_irq()

mask_irq(); /* chip->irq_mask() maps to EOI */

if (!action || irq_disabled())
return;

handle_actions();

if (irq_threads_active() || irq_disabled())
return;

unmask_irq(); /* chip->irq_unmask() maps to DIR */

So that is handle_level_irq() with the chip callbacks being:

irq_startup = gic_unmask
irq_shutdown = gic_mask
irq_unmask = gic_dir
irq_mask = gic_eoi

3) In the threaded case as seen above finalize_oneshot() will call
chip->unmask_irq() which maps to the DIR write and gets things
going again.

4) In the lazy irq disable case if the interrupt fires mask_irq()
[EOI] is good enough to silence it.

Though in the enable_irq() case you cannot rely on the automatic
resend of the interrupt when you unmask [DIR]. So we need to make
sure that even in the level case (dunno whether that's supported in
that mode) we end up calling the irq_retrigger() callback. But
that's rather simple to achieve with a new chip flag.

You might have to look at the suspend/resume implications, but if I
did not miss anything crucial that should be fine as well.

Thanks,

tglx

Marc Zyngier

2014-10-28 19:41:55 UTC

Peering at the code (and assuming I'm finally awake), request_irq() uses
irq_startup() -> irq_enable() -> chip->irq_unmask().

Right. That's the default implementation.

Post by Marc Zyngier
But you're perfectly right, it breaks an independent use of
unmask_irq(), which is pretty bad.

Indeed.

So if you are using that priority drop stuff, you need both calls even
for the non threaded case?

Yes. This is a global property (all interrupt lines for this irqchip are
affected), so even the non-threaded case has to issue both calls.

Ok.

Post by Thomas Gleixner
Can you please explain detailed how this "priority drop" mode
works?

Indeed.

Post by Thomas Gleixner
handle_xxx_irq()
mask_irq(); /* chip->irq_mask() maps to EOI */
if (!action || irq_disabled())
return;
handle_actions();
if (irq_threads_active() || irq_disabled())
return;
unmask_irq(); /* chip->irq_unmask() maps to DIR */
irq_startup = gic_unmask
irq_shutdown = gic_mask
irq_unmask = gic_dir
irq_mask = gic_eoi

So while this works really well for the interrupt handling part, it will
break [un]mask_irq(). This is because you can only write to EOI for an
interrupt that you have ACKed just before (anything else and the GIC
state machine goes crazy). Basically, any use for EOI/DIR outside of the
interrupt context itself (hardirq or thread) is really dangerous.

If we had a flag like IRQCHIP_UNMASK_IS_STARTUP, we could distinguish
this particular case, but that's borderline ugly.

Post by Thomas Gleixner
3) In the threaded case as seen above finalize_oneshot() will call
chip->unmask_irq() which maps to the DIR write and gets things
going again.

Yup.

Post by Thomas Gleixner
4) In the lazy irq disable case if the interrupt fires mask_irq()
[EOI] is good enough to silence it.
Though in the enable_irq() case you cannot rely on the automatic
resend of the interrupt when you unmask [DIR]. So we need to make
sure that even in the level case (dunno whether that's supported in
that mode) we end up calling the irq_retrigger() callback. But
that's rather simple to achieve with a new chip flag.

I think this one breaks for the same reason as above. And an interrupt
masked with EOI cannot easily be restarted without clearing the ACTIVE
bit (and everything becomes even more of a complete madness).

I need to think about it again.

Thanks,

M.

--
Jazz is not dead. It just smells funny...

Thomas Gleixner

2014-10-28 20:14:40 UTC

Post by Thomas Gleixner
Let me make a few assumptions and correct me if I'm wrong as usual.
1) The startup/shutdown procedure for such an interrupt is the
expensive mask/unmask which you want to avoid for the actual
handling case

Indeed.

I really doubt that the DIR invocation is dangerous outside of
interrupt context. Otherwise your threaded scheme would not work at
all as the DIR invocation happens in thread context.

The nice thing about the lazy irq disable code is that the irq_mask(),
i.e. EOI, invocation actually happens always in hard interrupt
context. We should never invoke irq_mask() from any other context if
you supply a startup/shutdown function.

Post by Thomas Gleixner
If we had a flag like IRQCHIP_UNMASK_IS_STARTUP, we could distinguish
this particular case, but that's borderline ugly.

Indeed. But I don't think it is required. See also below.

So we already established that irq_mask()/EOI will only be called from
the actual interrupt context and irq_unmask()/DIR must be safe to be
called from any context in order to make the EOI/DIR based threaded
optimization work.

So the only interesting code path is enable_irq() which invokes
irq_enable() and then the resend/retrigger machinery.

irq_enable() calls chip->irq_unmask(), i.e. DIR. So that clears the
ACTIVE bit and then the IRQ either gets resent by hardware (in case of
level as the device interrupt is still active) or retriggered by the
irq_retrigger() callback.

I might be wrong as usual, but if there is any restriction on DIR
versus the invocation context, your whole optimization scheme is hosed
anyway.

Thanks

tglx

Marc Zyngier

2014-10-29 10:11:45 UTC

Indeed.

I really doubt that the DIR invocation is dangerous outside of
interrupt context. Otherwise your threaded scheme would not work at
all as the DIR invocation happens in thread context.

There is a small restriction in the use of DIR (quoting the spec):

"If the interrupt identified in the GICC_DIR is not active, and is not a
spurious interrupt, the effect of the register write is UNPREDICTABLE.
This means any GICC_DIR write must identify an interrupt for which there
has been a valid GICC_EOIR or GICC_AEOIR write."

I think that affect the irq_enable you describe below.

Post by Thomas Gleixner
The nice thing about the lazy irq disable code is that the irq_mask(),
i.e. EOI, invocation actually happens always in hard interrupt
context. We should never invoke irq_mask() from any other context if
you supply a startup/shutdown function.

Post by Thomas Gleixner
If we had a flag like IRQCHIP_UNMASK_IS_STARTUP, we could distinguish
this particular case, but that's borderline ugly.

Indeed. But I don't think it is required. See also below.

So we already established that irq_mask()/EOI will only be called from
the actual interrupt context and irq_unmask()/DIR must be safe to be
called from any context in order to make the EOI/DIR based threaded
optimization work.
So the only interesting code path is enable_irq() which invokes
irq_enable() and then the resend/retrigger machinery.
irq_enable() calls chip->irq_unmask(), i.e. DIR. So that clears the
ACTIVE bit and then the IRQ either gets resent by hardware (in case of
level as the device interrupt is still active) or retriggered by the
irq_retrigger() callback.

The problem I see here is for an interrupt that has been flagged as
disabled with irq_disabled(), but that hasn't fired. We'd end up doing a
DIR on something that hasn't had an EOI first. I think that's the only
wrinkle in this scheme.

I'll implement something today, that will help me thinking.

Thanks,

M.

--
Jazz is not dead. It just smells funny...

Thomas Gleixner

2014-10-29 10:26:24 UTC

Post by Thomas Gleixner
irq_enable() calls chip->irq_unmask(), i.e. DIR. So that clears the
ACTIVE bit and then the IRQ either gets resent by hardware (in case of
level as the device interrupt is still active) or retriggered by the
irq_retrigger() callback.

Right. So the untested patch below should do the trick and prevent
irq_enable() to invoke irq_unmask() if the interrupt is not flagged
masked. And it only can be flagged masked if it was masked in the
handler. The startup callback will make sure that irq_enable() is not
invoked at startup time.

Thanks,

tglx

------------------

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e5202f00cabc..9c0f73e1994a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -182,7 +182,7 @@ int irq_startup(struct irq_desc *desc, bool resend)
ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
irq_state_clr_masked(desc);
} else {
- irq_enable(desc);
+ irq_enable(desc, false);
}
if (resend)
check_irq_resend(desc, desc->irq_data.irq);
@@ -202,13 +202,14 @@ void irq_shutdown(struct irq_desc *desc)
irq_state_set_masked(desc);
}

-void irq_enable(struct irq_desc *desc)
+void irq_enable(struct irq_desc *desc, bool ifmasked)
{
irq_state_clr_disabled(desc);
- if (desc->irq_data.chip->irq_enable)
+ if (desc->irq_data.chip->irq_enable) {
desc->irq_data.chip->irq_enable(&desc->irq_data);
- else
+ } else if (!ifmasked || irqd_irq_masked(&desc->irq_data)) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ }
irq_state_clr_masked(desc);
}

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d766619d..6eff2678cf6d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -68,7 +68,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq);

extern int irq_startup(struct irq_desc *desc, bool resend);
extern void irq_shutdown(struct irq_desc *desc);
-extern void irq_enable(struct irq_desc *desc);
+extern void irq_enable(struct irq_desc *desc, bool ifmasked);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a9104b4608b..d8c474608c2d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -448,7 +448,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq)
goto err_out;
/* Prevent probing on this irq: */
irq_settings_set_noprobe(desc);
- irq_enable(desc);
+ irq_enable(desc, true);
check_irq_resend(desc, irq);
/* fall-through */
}

Marc Zyngier

2014-10-30 14:15:32 UTC

Right. So the untested patch below should do the trick and prevent
irq_enable() to invoke irq_unmask() if the interrupt is not flagged
masked. And it only can be flagged masked if it was masked in the
handler. The startup callback will make sure that irq_enable() is not
invoked at startup time.
Thanks,
tglx
------------------
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e5202f00cabc..9c0f73e1994a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -182,7 +182,7 @@ int irq_startup(struct irq_desc *desc, bool resend)
ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
irq_state_clr_masked(desc);
} else {
- irq_enable(desc);
+ irq_enable(desc, false);
}
if (resend)
check_irq_resend(desc, desc->irq_data.irq);
@@ -202,13 +202,14 @@ void irq_shutdown(struct irq_desc *desc)
irq_state_set_masked(desc);
}
-void irq_enable(struct irq_desc *desc)
+void irq_enable(struct irq_desc *desc, bool ifmasked)
{
irq_state_clr_disabled(desc);
- if (desc->irq_data.chip->irq_enable)
+ if (desc->irq_data.chip->irq_enable) {
desc->irq_data.chip->irq_enable(&desc->irq_data);
- else
+ } else if (!ifmasked || irqd_irq_masked(&desc->irq_data)) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ }
irq_state_clr_masked(desc);
}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d766619d..6eff2678cf6d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -68,7 +68,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq);
extern int irq_startup(struct irq_desc *desc, bool resend);
extern void irq_shutdown(struct irq_desc *desc);
-extern void irq_enable(struct irq_desc *desc);
+extern void irq_enable(struct irq_desc *desc, bool ifmasked);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a9104b4608b..d8c474608c2d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -448,7 +448,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq)
goto err_out;
/* Prevent probing on this irq: */
irq_settings_set_noprobe(desc);
- irq_enable(desc);
+ irq_enable(desc, true);
check_irq_resend(desc, irq);
/* fall-through */
}

So I actually implemented this, and did hit another snag: per cpu interrupts.
They don't use the startup/shutdown methods, and reproducing the above logic
on a per-cpu basis is not very pretty.

In order to make some progress, I went on a slightly different path, which
is to use enable/disable instead of startup/shutdown. As far as I can see,
the only thing we loose by doing so is the lazy disable, but the code
becomes very straightforward (see below). I gave it a go on an ARMv7 box,
and it even survived.

Thoughts?

M.

From fee4719e3bd82536bf72a62aab619b873ed1b6f0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <***@arm.com>
Date: Thu, 23 Oct 2014 09:25:47 +0100
Subject: [PATCH] genirq: Add support for priority-drop/deactivate interrupt
controllers

Moderately recent ARM interrupt controllers can use a "split mode" EOI,
where instead of just using a single write to notify the controller of
the end of interrupt, uses the following:
- priority-drop: the interrupt is still active, but other interrupts can
now be taken (basically the equivalent of a mask)
- deactivate: the interrupt is not active anymore, and can be taken again
(equivalent to unmask+eoi).

This makes it very useful for threaded interrupts, as it avoids the usual
mask/unmask dance (and has the potential of being more efficient on ARM,
as it is using the CPU interface instead of the global distributor).

This patch implements a couple of new interrupt flows:
- handle_spliteoi_irq: this is the direct equivalent of handle_fasteoi_irq,
- handle_spliteoi_percpu_devid_irq: equivalent to handle_percpu_devid_irq.

It is expected that irqchip using these flows will implement something like:

.irq_enable = irqchip_unmask_irq,
.irq_disable = irqchip_mask_irq,
.irq_mask = irqchip_priority_drop_irq,
.irq_unmask = irqchip_deactivate_irq,

Signed-off-by: Marc Zyngier <***@arm.com>
---
include/linux/irq.h | 2 ++
kernel/irq/chip.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 165fac0..0887634 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -444,11 +444,13 @@ static inline int irq_set_parent(int irq, int parent_irq)
*/
extern void handle_level_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_spliteoi_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_spliteoi_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_nested_irq(unsigned int irq);

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e5202f0..84d2162 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -542,6 +542,56 @@ out:
EXPORT_SYMBOL_GPL(handle_fasteoi_irq);

/**
+ * handle_spliteoi_irq - irq handler for 2-phase-eoi controllers
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ *
+ * This relies on mask being a very cheap operation, and on
+ * unmask performing both unmask+EOI. This avoids additional
+ * operations for threaded interrupts (typically ARM's GICv2/v3).
+ */
+void
+handle_spliteoi_irq(unsigned int irq, struct irq_desc *desc)
+{
+ raw_spin_lock(&desc->lock);
+
+ if (!irq_may_run(desc))
+ goto out;
+
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+ kstat_incr_irqs_this_cpu(irq, desc);
+
+ /* Mark the IRQ as in progress */
+ mask_irq(desc);
+
+ /*
+ * If it's disabled or no action available
+ * then just get out of here:
+ */
+ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
+ desc->istate |= IRQS_PENDING;
+ goto out_unmask;
+ }
+
+ handle_irq_event(desc);
+
+ /* In case the handler itself disabled it */
+ if (irqd_irq_disabled(&desc->irq_data))
+ goto out_unmask;
+
+ /* If the thread is running, leave the IRQ in progress */
+ if (atomic_read(&desc->threads_active))
+ goto out;
+
+out_unmask:
+ /* Terminate the handling */
+ unmask_irq(desc);
+out:
+ raw_spin_unlock(&desc->lock);
+}
+EXPORT_SYMBOL_GPL(handle_spliteoi_irq);
+
+/**
* handle_edge_irq - edge type IRQ handler
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
@@ -683,6 +733,19 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
chip->irq_eoi(&desc->irq_data);
}

+static void __handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc)
+{
+ struct irqaction *action = desc->action;
+ void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
+ irqreturn_t res;
+
+ kstat_incr_irqs_this_cpu(irq, desc);
+
+ trace_irq_handler_entry(irq, action);
+ res = action->handler(irq, dev_id);
+ trace_irq_handler_exit(irq, action, res);
+}
+
/**
* handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
* @irq: the interrupt number
@@ -698,23 +761,34 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- struct irqaction *action = desc->action;
- void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
- irqreturn_t res;
-
- kstat_incr_irqs_this_cpu(irq, desc);

if (chip->irq_ack)
chip->irq_ack(&desc->irq_data);

- trace_irq_handler_entry(irq, action);
- res = action->handler(irq, dev_id);
- trace_irq_handler_exit(irq, action, res);
+ __handle_percpu_devid_irq(irq, desc);

if (chip->irq_eoi)
chip->irq_eoi(&desc->irq_data);
}

+/**
+ * handle_spliteoi_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ *
+ * Per CPU interrupts on SMP machines without locking requirements.
+ * Same as handle_percpu_devid_irq() above, but using the 2-phase-eoi
+ * model for the handling.
+ */
+void handle_spliteoi_percpu_devid_irq(unsigned int irq, struct irq_desc *desc)
+{
+ mask_irq(desc);
+
+ __handle_percpu_devid_irq(irq, desc);
+
+ unmask_irq(desc);
+}
+
void
__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
const char *name)

--
2.0.4
--
Jazz is not dead. It just smells funny...

Thomas Gleixner

2014-10-30 15:59:02 UTC