我們在分析linux內(nèi)核中斷剖析時(shí),簡單的聊了一下SOFTIRQ, 而沒有進(jìn)行深入分析. Linux內(nèi)核講對(duì)一個(gè)外部設(shè)備中斷的處理分成兩大部分HARDIRQ以及SOFTIRQ, HARDIRQ部分在執(zhí)行時(shí)處理器的中斷是關(guān)閉的,所以驅(qū)動(dòng)程序的中斷處理例程只應(yīng)該完成一些關(guān)鍵的中斷操作,而將耗時(shí)的操作放到SOFTIRQ部分執(zhí)行, 本篇文章我們將對(duì)這部分進(jìn)行深入討論.
SoftIrq的應(yīng)用非常廣泛, 例如我們常見的網(wǎng)卡在做網(wǎng)絡(luò)包的收發(fā), 封裝好用來做延遲操作的tasklet的實(shí)現(xiàn)等.
SoftIrq源代碼分析
首先看一下linux內(nèi)核當(dāng)中的irq類型, 而在softirq中維護(hù)著struct softirq_action softirq_vec[NR_SOFTIRQS]這樣一個(gè)類型的數(shù)組.
//include/linux/interrupt.h
enum
{
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,//網(wǎng)絡(luò)端口TX
NET_RX_SOFTIRQ,//網(wǎng)絡(luò)端口RX
BLOCK_SOFTIRQ,
IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,//tasklet實(shí)現(xiàn)時(shí)使用的irq
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the numbering. Sigh! */
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
在kernel_start 的時(shí)候,在做完中斷時(shí)間等初始化后,會(huì)進(jìn)行softirq的初始化動(dòng)作:
//init/main.c
asmlinkage __visible void __init start_kernel(void){
...
init_IRQ();
...
init_timers();
hrtimers_init();
softirq_init(); //softirq初始化動(dòng)作
...
}
softirq_init的動(dòng)作很簡單,做了兩件事情, 第一件事情:為每個(gè)核創(chuàng)建分別創(chuàng)建了tasklet_vec(對(duì)應(yīng)TASKLET_SOFTIRQ類型軟中斷)和tasklet_hi_vec(對(duì)應(yīng)HI_SOFTIRQ類型軟中斷)鏈表. 第二件事情就是給數(shù)組對(duì)應(yīng)的softirq_vec[NR_SOFTIRQS]中TASKLET_SOFTIRQ和HI_SOFTIRQ兩種類型的softirq_action初始化自己的softirq_action的回調(diào)處理函數(shù)
//kernel/softirq.c
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec[nr].action = action;
}
void __init softirq_init(void)
{
int cpu;
//第一件事情,給每個(gè)核創(chuàng)建一個(gè)對(duì)應(yīng)鏈表.
for_each_possible_cpu(cpu) {
per_cpu(tasklet_vec, cpu).tail =
&per_cpu(tasklet_vec, cpu).head;
per_cpu(tasklet_hi_vec, cpu).tail =
&per_cpu(tasklet_hi_vec, cpu).head;
}
//第二件事情,注冊兩個(gè)不同類型softirq的回調(diào)處理函數(shù)
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
接下來我們重點(diǎn)看一下softirq的處理核心函數(shù)__do_softirq, 它被調(diào)用的時(shí)間點(diǎn)是處理完中斷函數(shù)后會(huì)調(diào)用irq_exit(關(guān)于這點(diǎn)不清楚的可以回顧一下之前的文章: Linux內(nèi)核中斷剖析--外部中斷(上)時(shí)進(jìn)行處理,這里就不再做過多介紹.我們直接分析核心函數(shù)__do_softirq
//kernel/softirq.c
void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
local_irq_disable();
#else
lockdep_assert_irqs_disabled();
#endif
account_irq_exit_time(current);
preempt_count_sub(HARDIRQ_OFFSET);//表示HARDIRQ
if (!in_interrupt() && local_softirq_pending())//表示當(dāng)前沒有在軟硬和不可中斷中才可以進(jìn)入,防止中斷嵌套.
invoke_softirq();//這里面會(huì)call到關(guān)鍵函數(shù)
tick_irq_exit();
rcu_irq_exit();
trace_hardirq_exit(); /* must be last! */
}
static inline void invoke_softirq(void)
{
if (ksoftirqd_running(local_softirq_pending()))
return;
if (!force_irqthreads) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
* We can safely execute softirq on the current stack if
* it is the irq stack, because it should be near empty
* at this stage.
*/
__do_softirq();
#else
/*
* Otherwise, irq_exit() is called on the task stack that can
* be potentially deep already. So call softirq in its own stack
* to prevent from any overrun.
*/
do_softirq_own_stack();//內(nèi)部實(shí)現(xiàn)也是__do_softirq.
#endif
} else {
wakeup_softirqd();
}
}
//重點(diǎn)中的重點(diǎn)!!!
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current- >flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
bool in_hardirq;
__u32 pending;
int softirq_bit;
/*
* Mask out PF_MEMALLOC as the current task context is borrowed for the
* softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
* again if the socket is related to swapping.
*/
current- >flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();//獲取被置起來的中斷類型,以類型為對(duì)應(yīng)的bit位
account_irq_enter_time(current);
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);//表示進(jìn)入softirq上下文
in_hardirq = lockdep_softirq_start();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);//清空被置起需要處理類型的軟中斷類型.
local_irq_enable();
h = softirq_vec;
//ffs函數(shù)為找到對(duì)應(yīng)pending的第一個(gè)bit不為0 的bit位.
//實(shí)際就是遍歷一遍softirq_vec中被置起來的soft.
while ((softirq_bit = ffs(pending))) {
unsigned int vec_nr;
int prev_count;
h += softirq_bit - 1;
vec_nr = h - softirq_vec;
prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h- >action(h);//調(diào)用到我們的注冊的action
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\\n",
vec_nr, softirq_to_name[vec_nr], h- >action,
prev_count, preempt_count());
preempt_count_set(prev_count);
}
h++;
pending > >= softirq_bit;
}
if (__this_cpu_read(ksoftirqd) == current)
rcu_softirq_qs();
local_irq_disable();
pending = local_softirq_pending();
if (pending) {
if (time_before(jiffies, end) && !need_resched() &&
--max_restart)
goto restart;
wakeup_softirqd();
}
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
}