From 4650c63c53cb450a34ff8d433ca287a2c1434fd8 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 30 Dec 2010 05:04:24 +0100 Subject: [PATCH] sched, cgroup: Use exit hook to avoid use-after-free crash On Thu, 2010-12-30 at 00:07 +0100, Miklos Vajna wrote: > On Wed, Dec 29, 2010 at 04:25:22PM +0100, Ingo Molnar wrote: > > I tried this patch, but it causes a boot crash: > > Hm, indeed. (I get a crash in qemu, but not on the host machine.) > > qemu -enable-kvm -kernel kernel-build/arch/x86/boot/bzImage -append "root=/dev/sda1 debug sched_debug ignore_loglevel sysrq_always_enabled console=ttyS0 init=/bin/systemd" -hda systemd.img -serial stdio -m 1G -vnc :0 > > does not crash here, but > > qemu -enable-kvm -kernel kernel-build/arch/x86/boot/bzImage -append "root=/dev/sda1 debug sched_debug ignore_loglevel sysrq_always_enabled console=ttyS0" -hda systemd.img -serial stdio -m 1G -vnc :0 > > does. > > I'm attaching the config (what I already sent earlier in this thread) > and the output of the above two commands just in case that helps Peter. Can you try the below? I originally cobbled fugly thing together as a diag, but it may still be interesting to see if it stops kaboom. Seems like cgroup refcounting must be buggy tho.. Too bad I aborted my dog slow systemd.img download (started again, eta 9h45m;) when Peter posted, or I'd just try it myself right now. --- kernel/sched.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 files changed, 40 insertions(+), 0 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ab869f7..b6c7372 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8706,6 +8706,45 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) return &tg->css; } +static int +cpu_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + struct task_group *tg = cgroup_tg(cgrp); + unsigned long flags; + int i, err = 0; + + spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) { + struct rq *rq = cpu_rq(i); +#ifdef CONFIG_FAIR_GROUP_SCHED + struct sched_entity *se = tg->se[i]; +#endif +#ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity *rt_se = tg->rt_se[i]; +#endif + + raw_spin_lock(&rq->lock); +#ifdef CONFIG_FAIR_GROUP_SCHED + if (se->on_rq || se->load.weight) { + err = -EBUSY; + raw_spin_unlock(&rq->lock); + break; + } +#endif +#ifdef CONFIG_RT_GROUP_SCHED + if (rt_se->my_q->rt_nr_running) { + err = -EBUSY; + raw_spin_unlock(&rq->lock); + break; + } +#endif + raw_spin_unlock(&rq->lock); + } + spin_unlock_irqrestore(&task_group_lock, flags); + + return err; +} + static void cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) { @@ -8841,6 +8880,7 @@ static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) struct cgroup_subsys cpu_cgroup_subsys = { .name = "cpu", .create = cpu_cgroup_create, + .pre_destroy = cpu_cgroup_pre_destroy, .destroy = cpu_cgroup_destroy, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, -- 1.7.3.4