Reading notes about Generic Time Subsystem implementation on linux.
Author: Honggang Yang(Joseph) <[email protected]>
Kernel Version: Linux 3.1.1
==================================================================
REF:
http://kerneldox.com
Professional Linux Kernel Architecture
------------------------------------------------------------------------------------------------------------------------
Contents:
0. header
1. Overview
2. Working with Clock Sources
3. Clock Event Device
=============================================================
0. header
An overview of the building blocks employed to implement the timing
subsystem is given in Figure below. It gives a quick glance at what is
involed in timekeeping, and how the components interact with each other.
Figure: Overview of the components that build up the timing subsytem.
![](https://img.laitimes.com/img/_0nNw4CM6IyYiwiM6ICdiwiIml2ZuoFTVpXNzUzM2AjMyMTMfBzLcNjMvwVMxETMwIzLcRnbl1GajFGd0F2LcRXZu5ibkN3YukGavw1LcpDc0RHaiojIsJye.gif)
As you can see, the raw hardware sits at the very bottom. Every typical
system has several devices, ususlly implemented by clock chips, that
provide timing functionality and can serve as clocks. IA-32 and AMD64 have
a programmable interrupt timer(PIT, implemented by the 8253 chip) as a classical
clock source the has only a modest resolution and stability. CPU-local APICs
(advanced programmable interrupt controllers) provide much better
resolution and stability. They are suitable as high-resolution time sources,
whereas the PIT is only good enough for low-resolution timers.
Hardware naturally needs to be programmed by architecture-specific code,
but the clock source abstraction provides a generic interface to all hardware
clock chips. Essentially, read access to the current value of the running
counter provided by a clock chip is granted. Periodic events do not comply
with a free running counter very well, thus another abstraction is required.
Clock events are the foundation of periodic events. However, clock events
can be more powerful. Some time devices can provide events at arbitrary,
irregular time points. In contrast to periodic event devices, they are called
one-shot devices.
1. Overview
Figure: Overview of the generic time subsytem
Three mechanisms form the foundation of any time-related task in the kernel:
1> Clock Source ( struct clocksource) -- Each clock source provides a monotonically
increasing counter with Read Only access for the generic parts. The
accurateness of the clocksource varies depending on the capabilities of the
underlying hardware.
2> Clock Event Devices ( struct clock_event_device) -- Add the possibility of
equipping clocks with events that occur at a certain time in the future.
We also refer to such devices as clock event source for historical reasons.
3> Tick Devices( struct tick_device) -- Extended clock event sources to provide
a continuous stream of tick events that happen at regular time intervals.
The kernel distinguishes between two types of clocks:
1> Global Clock -- It is responsible to provide the peridic tick that is mainly
used to update the @jiffies value. In former versions of the kernel, this
type of clock was realized by the PIT on IA-32 systems.
2> Local Clock -- one local clock per CPU allows for performing process
accounting, profiling, and last but not least, high-resolution timers.
2. Object for Time Management
Clock Sources:
129
164 struct clocksource {
165
169 cycle_t (*read)(struct clocksource *cs);
170 cycle_t cycle_last;
171 cycle_t mask;
172 u32 mult;
173 u32 shift;
174 u64 max_idle_ns;
175
176 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
177 struct arch_clocksource_data archdata;
178 #endif
179
180 const char *name;
181 struct list_head list;
182 int rating;
183 int (*enable)(struct clocksource *cs);
184 void (*disable)(struct clocksource *cs);
185 unsigned long flags;
186 void (*suspend)(struct clocksource *cs);
187 void (*resume)(struct clocksource *cs);
188
189 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
190
191 struct list_head wd_list;
192 cycle_t cs_last;
193 cycle_t wd_last;
194 #endif
195 } ____cacheline_aligned;
164
174 static struct clocksource *curr_clocksource;
175 static LIST_HEAD(clocksource_list);
176 static DEFINE_MUTEX(clocksource_mutex);
177 static char override_name[32];
178 static int finished_booting;
2. Working with Clock Sources
1> Register a clock source.
Before using a clock, you must register a clock source with the kernel.
The function clocksource_register() is responsible for this. The source
is only added to the global @clocksource_list, which sorts all available
clock sources by their rating.
2> Read the clock.
To read the clock, the kernel provides the following functions:
getnstimeofday() // in kernel/time/timekeeping.c
Details of the functions mentioned above:
Call Tree:
clocksource_register
clocksource_max_deferment
clocksource_enqueue
clocksource_enqueue_watchdog
clocksource_select
timekeeping_notify // Don't go to so far, they are out of our topic here
stop_machine(change_clocksource...)
tick_clock_notify
timekeeping_get_ns
clock->read(clock)
698
704 int clocksource_register(struct clocksource *cs)
705 {
706
707 cs->max_idle_ns = clocksource_max_deferment(cs);
708
709 mutex_lock(&clocksource_mutex);
710 clocksource_enqueue(cs);
711 clocksource_enqueue_watchdog(cs);
712 clocksource_select();
713 mutex_unlock(&clocksource_mutex);
714 return 0;
715 }
496
501 static u64 clocksource_max_deferment(struct clocksource *cs)
502 {
503 u64 max_nsecs, max_cycles;
504
505
519 max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
520
521
525 max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
526 max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
527
528
534 return max_nsecs - ( max_nsecs >> 3 );
535 }
536
261
268 static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
269 {
270 return ((u64) cycles * mult) >> shift;
271 }
616
619 static void clocksource_enqueue(struct clocksource *cs)
620 {
621 struct list_head *entry = &clocksource_list;
622 struct clocksource *tmp;
623
624 list_for_each_entry(tmp, &clocksource_list, list)
625
626 if (tmp->rating >= cs->rating)
627 entry = &tmp->list;
628 list_add(&cs->list, entry);
629 }
366 static void clocksource_enqueue_watchdog(struct clocksource *cs)
367 {
368 unsigned long flags;
369
370 spin_lock_irqsave(&watchdog_lock, flags);
371 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
372
373 list_add(&cs->wd_list, &watchdog_list);
374 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
375 } else {
376
377 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
378 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
379
380 if (!watchdog || cs->rating > watchdog->rating) {
381 watchdog = cs;
382
383 clocksource_reset_watchdog();
384 }
385 }
386
387 clocksource_start_watchdog();
388 spin_unlock_irqrestore(&watchdog_lock, flags);
389 }
539
547 static void clocksource_select(void)
548 {
549 struct clocksource *best, *cs;
550
551 if (!finished_booting || list_empty(&clocksource_list))
552 return;
553
554 best = list_first_entry(&clocksource_list, struct clocksource, list);
555
556 list_for_each_entry(cs, &clocksource_list, list) {
557 if (strcmp(cs->name, override_name) != 0)
558 continue;
559
564 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
565 tick_oneshot_mode_active()) {
566
567 printk(KERN_WARNING "Override clocksource %s is not "
568 "HRT compatible. Cannot switch while in "
569 "HRT/NOHZ mode\n", cs->name);
570 override_name[0] = 0;
571 } else
572
573 best = cs;
574 break;
575 }
576 if (curr_clocksource != best) {
577 printk(KERN_INFO "Switching to clocksource %s\n", best->name);
578 curr_clocksource = best;
579 timekeeping_notify(curr_clocksource);
580 }
581 }
447
454 void timekeeping_notify(struct clocksource *clock)
455 {
456 if (timekeeper.clock == clock)
457 return;
458 stop_machine(change_clocksource, clock, NULL);
459 tick_clock_notify();
460 }
211
217 void getnstimeofday(struct timespec *ts)
218 {
219 unsigned long seq;
220 s64 nsecs;
221
222 WARN_ON(timekeeping_suspended);
223
224 do {
225 seq = read_seqbegin(&xtime_lock);
226
227 *ts = xtime;
228 nsecs = timekeeping_get_ns();
229
230
231 nsecs += arch_gettimeoffset();
232
233 } while (read_seqretry(&xtime_lock, seq));
234
235 timespec_add_ns(ts, nsecs);
236 }
237
238 EXPORT_SYMBOL(getnstimeofday);
104
105 static inline s64 timekeeping_get_ns(void)
106 {
107 cycle_t cycle_now, cycle_delta;
108 struct clocksource *clock;
109
110
111 clock = timekeeper.clock;
112 cycle_now = clock->read(clock);
113
114
115 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
116
117
118 return clocksource_cyc2ns(cycle_delta, timekeeper.mult,
119 timekeeper.shift);
120 }
The following is the APIs of the clocksource you can use in your modules:
EXPORT_SYMBOL_GPL(timecounter_init); +
EXPORT_SYMBOL_GPL(timecounter_read); +
EXPORT_SYMBOL_GPL(timecounter_cyc2time); +
EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); +
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
EXPORT_SYMBOL(clocksource_register); +
EXPORT_SYMBOL(clocksource_change_rating); +
EXPORT_SYMBOL(clocksource_unregister); +
Now let get into the details of them.
clocksource_register() has been talked above.
clocksource_unregister:
737
740 void clocksource_unregister(struct clocksource *cs)
741 {
742 mutex_lock(&clocksource_mutex);
743 clocksource_dequeue_watchdog(cs);
744 list_del(&cs->list);
745 clocksource_select();
746 mutex_unlock(&clocksource_mutex);
747 }
748 EXPORT_SYMBOL(clocksource_unregister);
timecounter_init:
29
43 struct cyclecounter {
44 cycle_t (*read)(const struct cyclecounter *cc);
45 cycle_t mask;
46 u32 mult;
47 u32 shift;
48 };
50
66 struct timecounter {
67 const struct cyclecounter *cc;
68 cycle_t cycle_last;
69 u64 nsec; ///?????
70 };
34 void timecounter_init(struct timecounter *tc,
35 const struct cyclecounter *cc,
36 u64 start_tstamp)
37 {
38 tc->cc = cc;
39 tc->cycle_last = cc->read(cc);
40 tc->nsec = start_tstamp;
41 }
42 EXPORT_SYMBOL_GPL(timecounter_init);
75 u64 timecounter_read(struct timecounter *tc)
76 {
77 u64 nsec;
78
79
80 nsec = timecounter_read_delta(tc);
81 nsec += tc->nsec;
82 tc->nsec = nsec;
83
84 return nsec;
85 }
86 EXPORT_SYMBOL_GPL(timecounter_read);
44
55 static u64 timecounter_read_delta(struct timecounter *tc)
56 {
57 cycle_t cycle_now, cycle_delta;
58 u64 ns_offset;
59
60
61 cycle_now = tc->cc->read(tc->cc);
62
63
64 cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
65
66
67 ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
68
69
70 tc->cycle_last = cycle_now;
71
72 return ns_offset;
73 }
88 u64 timecounter_cyc2time(struct timecounter *tc,
89 cycle_t cycle_tstamp)
90 {
91 u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
92 u64 nsec;
93
94
99 if (cycle_delta > tc->cc->mask / 2) {
100 cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
101 nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
102 } else {
103 nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
104 }
105
106 return nsec;
107 }
108 EXPORT_SYMBOL_GPL(timecounter_cyc2time);
109
call tree:
clocksource_change_rating
__clocksource_change_rating
clocksource_enqueue
clocksource_select
726
729 void clocksource_change_rating(struct clocksource *cs, int rating)
730 {
731 mutex_lock(&clocksource_mutex);
732 __clocksource_change_rating(cs, rating);
733 mutex_unlock(&clocksource_mutex);
734 }
735 EXPORT_SYMBOL(clocksource_change_rating);
631
642 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
643 {
644 u64 sec;
645
646
656 sec = (cs->mask - (cs->mask >> 3 ));
657 do_div(sec, freq);
658 do_div(sec, scale);
659 if (!sec)
660 sec = 1;
661 else if (sec > 600 && cs->mask > UINT_MAX)
662 sec = 600;
663
664 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
665 NSEC_PER_SEC / scale, sec * scale);
666 cs->max_idle_ns = clocksource_max_deferment(cs);
667 }
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
__clocksource_updatefreq_scale(cs, scale, freq);
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
---------------------------------------
Initialization of clocksource
Call Tree:
start_kernel
timekeeping_init
read_persistent_clock
clocksource_default_clock
time_init
...
late_time_init
hpet_time_init
hpet_enable
...
hpet_clocksource_register
hpet_restart_counter
clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq)
__clocksource_register_scale(cs, 1, hz)
hpet_legacy_clockevent_register
clockevents_register_device
...
//setup_pit_timer
clockevent_i8253_init
clockevents_config_and_register(&i8253_clockevent ...)
clockevents_config
clockevents_register_device
setup_default_timer_irq
559
562 void __init timekeeping_init(void)
563 {
564 struct clocksource *clock;
565 unsigned long flags;
566 struct timespec now, boot;
567
568 read_persistent_clock(&now);
569 read_boot_clock(&boot);
570
571 write_seqlock_irqsave(&xtime_lock, flags);
572
573 ntp_init();
574
575 clock = clocksource_default_clock();
576 if (clock->enable)
577 clock->enable(clock);
578 timekeeper_setup_internals(clock);
579
580 xtime.tv_sec = now.tv_sec;
581 xtime.tv_nsec = now.tv_nsec;
582 raw_time.tv_sec = 0;
583 raw_time.tv_nsec = 0;
584 if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
585 boot.tv_sec = xtime.tv_sec;
586 boot.tv_nsec = xtime.tv_nsec;
587 }
588 set_normalized_timespec(&wall_to_monotonic,
589 -boot.tv_sec, -boot.tv_nsec);
590 total_sleep_time.tv_sec = 0;
591 total_sleep_time.tv_nsec = 0;
592 write_sequnlock_irqrestore(&xtime_lock, flags);
593 }
594
//arch/x86/kernel/x86_init.c
98 struct x86_platform_ops x86_platform = {
99 .calibrate_tsc = native_calibrate_tsc,
100 .get_wallclock = mach_get_cmos_time,
101 .set_wallclock = mach_set_rtc_mmss,
102 .iommu_shutdown = iommu_shutdown_noop,
103 .is_untracked_pat_range = is_ISA_range,
104 .nmi_init = default_nmi_init,
105 .i8042_detect = default_i8042_detect
106 };
185
186 void read_persistent_clock(struct timespec *ts)
187 {
188 unsigned long retval;
189
190 retval = x86_platform.get_wallclock();
191
192 ts->tv_sec = retval;
193 ts->tv_nsec = 0;
194 }
94 struct clocksource * __init __weak clocksource_default_clock(void)
95 {
96 return &clocksource_jiffies;
97 }
30
40 #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
41
42
54 #define JIFFIES_SHIFT 8
55
56 static cycle_t jiffies_read(struct clocksource *cs)
57 {
58 return (cycle_t) jiffies;
59 }
60
61 struct clocksource clocksource_jiffies = {
62 .name = "jiffies",
63 .rating = 1,
64 .read = jiffies_read,
65 .mask = 0xffffffff,
66 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT,
67 .shift = JIFFIES_SHIFT,
68 };
87 static int __init init_jiffies_clocksource(void)
88 {
89 return clocksource_register(&clocksource_jiffies);
90 }
91
92 core_initcall(init_jiffies_clocksource);
The @clocksource_jiffies above is a simple example of how to implement
your clocksource.
71 static struct irqaction irq0 = {
72 .handler = timer_interrupt,
73 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
74 .name = "timer"
75 };
76
77 void __init setup_default_timer_irq(void)
78 {
79 setup_irq(0, &irq0);
80 }
81
82
83 void __init hpet_time_init(void)
84 {
85 if (!hpet_enable())
86 setup_pit_timer();
87 setup_default_timer_irq();
88 }
89
90 static __init void x86_late_time_init(void)
91 {
92 x86_init.timers.timer_init();
93 tsc_init();
94 }
95
96
100 void __init time_init(void)
101 {
102 late_time_init = x86_late_time_init;
103 }
31
35 struct x86_init_ops x86_init __initdata = {//arch/x86/kernel/x86_init.c
...
73 .timers = {
74 .setup_percpu_clockev = setup_boot_APIC_clock,
75 .tsc_pre_init = x86_init_noop,
76 .timer_init = hpet_time_init,
77 .wallclock_init = x86_init_noop,
78 },
...
};
787
790 int __init hpet_enable(void)
791 {
792 unsigned long hpet_period;
793 unsigned int id;
794 u64 freq;
795 int i;
796
797 if (!is_hpet_capable())
798 return 0;
799
800 hpet_set_mapping();
801
802
805 hpet_period = hpet_readl(HPET_PERIOD);
806
807
820 for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
821 if (i == 1000) {
822 printk(KERN_WARNING
823 "HPET config register value = 0xFFFFFFFF. "
824 "Disabling HPET\n");
825 goto out_nohpet;
826 }
827 }
828
829 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
830 goto out_nohpet;
831
832
836 freq = FSEC_PER_SEC;
837 do_div(freq, hpet_period);
838 hpet_freq = freq;
839
840
844 id = hpet_readl(HPET_ID);
845 hpet_print_config();
846
847 #ifdef CONFIG_HPET_EMULATE_RTC
848
852 if (!(id & HPET_ID_NUMBER))
853 goto out_nohpet;
854 #endif
855
856 if (hpet_clocksource_register())
857 goto out_nohpet;
858
859 if (id & HPET_ID_LEGSUP) {
860 hpet_legacy_clockevent_register();
861 return 1;
862 }
863 return 0;
864
865 out_nohpet:
866 hpet_clear_mapping();
867 hpet_address = 0;
868 return 0;
869 }
870
754 static int hpet_clocksource_register(void)
755 {
756 u64 start, now;
757 cycle_t t1;
758
759
760 hpet_restart_counter();
761
762
763 t1 = hpet_readl(HPET_COUNTER);
764 rdtscll(start);
765
766
772 do {
773 rep_nop();
774 rdtscll(now);
775 } while ((now - start) < 200000UL);
776
777 if (t1 == hpet_readl(HPET_COUNTER)) {
778 printk(KERN_WARNING
779 "HPET counter not counting. HPET disabled\n");
780 return -ENODEV;
781 }
782
783 clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
784 return 0;
785 }
20 void __init setup_pit_timer(void)
21 {
22 clockevent_i8253_init(true);
23 global_clock_event = &i8253_clockevent;
24 }
--------------
3. Clock Event Device
-------------
Clock event devices allow for gegistering an event that is going to happen
at a defined point of time in the future. In comparison to a full-blown timer
implementation, however, only a single event can be stored.
The key elements of every clock_event_device are set_next_event because
it allows for setting the time at which the events is going to take place, and
event_handler, which is called when the event acturally happens.
57
80 struct clock_event_device {
81 void (*event_handler)(struct clock_event_device *);
82 int (*set_next_event)(unsigned long evt,
83 struct clock_event_device *);
84 ktime_t next_event;
85 u64 max_delta_ns;
86 u64 min_delta_ns;
87 u32 mult;
88 u32 shift;
89 enum clock_event_mode mode;
90 unsigned int features;
91 unsigned long retries;
92
93 void (*broadcast)(const struct cpumask *mask);
94 void (*set_mode)(enum clock_event_mode mode,
95 struct clock_event_device *);
96 unsigned long min_delta_ticks;
97 unsigned long max_delta_ticks;
98
99 const char *name;
100 int rating;
101 int irq;
102 const struct cpumask *cpumask;
103 struct list_head list;
104 } ____cacheline_aligned;
24
25 static LIST_HEAD(clockevent_devices);
26 static LIST_HEAD(clockevents_released);
27
28
29 static RAW_NOTIFIER_HEAD(clockevents_chain);
30
31
32 static DEFINE_RAW_SPINLOCK(clockevents_lock);
APIs:
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
EXPORT_SYMBOL_GPL(clockevents_register_device);
EXPORT_SYMBOL_GPL(clockevents_notify);
34
41 u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
42 {
43 u64 clc = (u64) latch << evt->shift;
44
45 if (unlikely(!evt->mult)) {
46 evt->mult = 1;
47 WARN_ON(1);
48 }
49
50 do_div(clc, evt->mult);
51 if (clc < 1000)
52 clc = 1000;
53 if (clc > KTIME_MAX)
54 clc = KTIME_MAX;
55
56 return clc;
57 }
58 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
Call Tree:
clockevents_register_device
clockevents_do_notify
raw_notifier_call_chain
clockevents_notify_released
clockevents_do_notify
raw_notifier_call_chain
176
180 void clockevents_register_device(struct clock_event_device *dev)
181 {
182 unsigned long flags;
183
184 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
185 if (!dev->cpumask) {
186 WARN_ON(num_possible_cpus() > 1);
187 dev->cpumask = cpumask_of(smp_processor_id());
188 }
189
190 raw_spin_lock_irqsave(&clockevents_lock, flags);
191
192 list_add(&dev->list, &clockevent_devices);
193 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
194 clockevents_notify_released();
195
196 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
197 }
198 EXPORT_SYMBOL_GPL(clockevents_register_device);
163 static void clockevents_notify_released(void)
164 {
165 struct clock_event_device *dev;
166
167 while (!list_empty(&clockevents_released)) {
168 dev = list_entry(clockevents_released.next,
169 struct clock_event_device, list);
170 list_del(&dev->list);
171 list_add(&dev->list, &clockevent_devices);
172 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
173 }
174 }
302
305 void clockevents_notify(unsigned long reason, void *arg)
306 {
307 struct clock_event_device *dev, *tmp;
308 unsigned long flags;
309 int cpu;
310
311 raw_spin_lock_irqsave(&clockevents_lock, flags);
312 clockevents_do_notify(reason, arg);
313
314 switch (reason) {
315 case CLOCK_EVT_NOTIFY_CPU_DEAD:
316
320 list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
321 list_del(&dev->list);
322
325 cpu = *((int *)arg);
326 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
327 if (cpumask_test_cpu(cpu, dev->cpumask) &&
328 cpumask_weight(dev->cpumask) == 1 &&
329 !tick_is_broadcast_device(dev)) {
330 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
331 list_del(&dev->list);
332 }
333 }
334 break;
335 default:
336 break;
337 }
338 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
339 }
340 EXPORT_SYMBOL_GPL(clockevents_notify);
Notify request released by clockevents_do_notify() finally handled by
tick_notify() in kernel/time/tick-common.c
Call Tree:
start_kernel
tick_init
clockevents_register_notifier(&tick_notifier)
407 static struct notifier_block tick_notifier = {
408 .notifier_call = tick_notify,
409 };
410
411
416 void __init tick_init(void)
417 {
418 clockevents_register_notifier(&tick_notifier);
419 }
359
362 static int tick_notify(struct notifier_block *nb, unsigned long reason,
363 void *dev)
364 {
365 switch (reason) {
366
367 case CLOCK_EVT_NOTIFY_ADD:
368 return tick_check_new_device(dev);
369
370 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
371 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
372 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
373 tick_broadcast_on_off(reason, dev);
374 break;
375
376 case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
377 case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
378 tick_broadcast_oneshot_control(reason);
379 break;
380
381 case CLOCK_EVT_NOTIFY_CPU_DYING:
382 tick_handover_do_timer(dev);
383 break;
384
385 case CLOCK_EVT_NOTIFY_CPU_DEAD:
386 tick_shutdown_broadcast_oneshot(dev);
387 tick_shutdown_broadcast(dev);
388 tick_shutdown(dev);
389 break;
390
391 case CLOCK_EVT_NOTIFY_SUSPEND:
392 tick_suspend();
393 tick_suspend_broadcast();
394 break;
395
396 case CLOCK_EVT_NOTIFY_RESUME:
397 tick_resume();
398 break;
399
400 default:
401 break;
402 }
403
404 return NOTIFY_OK;
405 }
406
Before go further, a new structure is introduced here.
13 enum tick_device_mode {
14 TICKDEV_MODE_PERIODIC,
15 TICKDEV_MODE_ONESHOT,
16 };
17
18 struct tick_device {
19 struct clock_event_device *evtdev;
20 enum tick_device_mode mode;
21 };
A tick_device is just a wrapper around struct clock_event_device with
additional field that specifies which mode the device is in. This can either be
periodic or one-shot. The distinction will be important when tickless systems
are considered. A tick device can be seen as mechanism to provides a
continuous stream of tick events here. These form the basic for the scheduler,
the classical timer wheel, and related components of the kernel.
Note that the kernel automatically creats a tick device for the new clock
event device is registered(see tick_check_new_device() for details).
Some global variables are defined.
- tick_cpu_device is a per-CPU list containing one instance of struct tick_device
for each CPU in the system.
- tick_next_period specifies the time( in nanoseconds) when the next global tick event will happen.
- tick_do_timer_cpu contains the CPU number whose tick device assumes the role of the
global tick device.
- tick_period stores the interval between ticks in nanoseconds.
Let us back to the tick_notify() analysis.
Call Tree:
tick_check_new_device// Be called by tick_notify() when new clock event device added.
205
208 static int tick_check_new_device(struct clock_event_device *newdev)
209 {
210 struct clock_event_device *curdev;
211 struct tick_device *td;
212 int cpu, ret = NOTIFY_OK;
213 unsigned long flags;
214
215 raw_spin_lock_irqsave(&tick_device_lock, flags);
216
217 cpu = smp_processor_id();
218 if (!cpumask_test_cpu(cpu, newdev->cpumask))
219 goto out_bc;
220
221 td = &per_cpu(tick_cpu_device, cpu);
222 curdev = td->evtdev;
223
224
225 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
226
227
231 if (!irq_can_set_affinity(newdev->irq))
232 goto out_bc;
233
234
238 if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
239 goto out_bc;
240 }
241
242
246 if (curdev) {
247
250 if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
251 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
252 goto out_bc;
253
256 if (curdev->rating >= newdev->rating)
257 goto out_bc;
258 }
259
260
265 if (tick_is_broadcast_device(curdev)) {
266 clockevents_shutdown(curdev);
267 curdev = NULL;
268 }
269 clockevents_exchange_device(curdev, newdev);
270 tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
271 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
272 tick_oneshot_notify();
273
274 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
275 return NOTIFY_STOP;
276
277 out_bc:
278
281 if (tick_check_broadcast_device(newdev))
282 ret = NOTIFY_STOP;
283
284 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
285
286 return ret;
287 }
288
64
67 int tick_check_broadcast_device(struct clock_event_device *dev)
68 {
69 if ((tick_broadcast_device.evtdev &&
70 tick_broadcast_device.evtdev->rating >= dev->rating) ||
71 (dev->features & CLOCK_EVT_FEAT_C3STOP))
72 return 0;
73
74 clockevents_exchange_device(NULL, dev);
75 tick_broadcast_device.evtdev = dev;
76 if (!cpumask_empty(tick_get_broadcast_mask()))
77 tick_broadcast_start_periodic(dev);
78 return 1;
79 }
271
278 void clockevents_exchange_device(struct clock_event_device *old,
279 struct clock_event_device *new)
280 {
281 unsigned long flags;
282
283 local_irq_save(flags);
284
288 if (old) {
289 clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
290 list_del(&old->list);
291 list_add(&old->list, &clockevents_released);
292 }
293
294 if (new) {
295 BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
296 clockevents_shutdown(new);
297 }
298 local_irq_restore(flags);
299 }
Call Tree:
tick_notify
tick_check_new_device
tick_setup_device
147
150 static void tick_setup_device(struct tick_device *td,
151 struct clock_event_device *newdev, int cpu,
152 const struct cpumask *cpumask)
153 {
154 ktime_t next_event;
155 void (*handler)(struct clock_event_device *) = NULL;
156
157
160 if (!td->evtdev) {
161
165 if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
166 tick_do_timer_cpu = cpu;
167 tick_next_period = ktime_get();
168 tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
169 }
170
171
174 td->mode = TICKDEV_MODE_PERIODIC;
175 } else {
176 handler = td->evtdev->event_handler;
177 next_event = td->evtdev->next_event;
178 td->evtdev->event_handler = clockevents_handle_noop;
179 }
180
181 td->evtdev = newdev;
182
183
187 if (!cpumask_equal(newdev->cpumask, cpumask))
188 irq_set_affinity(newdev->irq, cpumask);
189
190
196 if (tick_device_uses_broadcast(newdev, cpu))
197 return;
198
199 if (td->mode == TICKDEV_MODE_PERIODIC)
200 tick_setup_periodic(newdev, 0);
201 else
202 tick_setup_oneshot(newdev, handler, next_event);
203 }
204
Call Tree:
tick_setup_periodic // called by tick_setup_device()
tick_set_periodic_handler
tick_handle_periodic
or tick_handle_periodic_broadcast
--y-->
|
| no
|--->
|--->
114
117 void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
118 {
119 tick_set_periodic_handler(dev, broadcast);
120
121
122 if (!tick_device_is_functional(dev))
123 return;
124
125 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
126 !tick_broadcast_oneshot_active()) {
127 clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
128 } else {
129 unsigned long seq;
130 ktime_t next;
131
132 do {
133 seq = read_seqbegin(&xtime_lock);
134 next = tick_next_period;
135 } while (read_seqretry(&xtime_lock, seq));
136
137 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
138
139 for (;;) {
140 if (!clockevents_program_event(dev, next, ktime_get()))
141 return;
142 next = ktime_add(next, tick_period);
143 }
144 }
145 }
281
284 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
285 {
286 if (!broadcast)
287 dev->event_handler = tick_handle_periodic;
288 else
289 dev->event_handler = tick_handle_periodic_broadcast;
290 }
Call Tree:
tick_handle_periodic
tick_periodic
--> do_timer
update_process_times
profile_tick
--> clockevents_program_event
79
82 void tick_handle_periodic(struct clock_event_device *dev)
83 {
84 int cpu = smp_processor_id();
85 ktime_t next;
86
87 tick_periodic(cpu);
88
89 if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
90 return;
91
95 next = ktime_add(dev->next_event, tick_period);
96 for (;;) {
97 if (!clockevents_program_event(dev, next, ktime_get()))
98 return;
99
108 if (timekeeping_valid_for_hres())
109 tick_periodic(cpu);
110 next = ktime_add(next, tick_period);
111 }
112 }
Call Tree:
tick_handle_periodic_broadcast
tick_do_periodic_broadcast
tick_do_broadcast
--> clockevents_program_event
172
175 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
176 {
177 ktime_t next;
178
179 tick_do_periodic_broadcast();
180
181
184 if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
185 return;
186
187
194 for (next = dev->next_event; ;) {
195 next = ktime_add(next, tick_period);
196
197 if (!clockevents_program_event(dev, next, ktime_get()))
198 return;
199 tick_do_periodic_broadcast();
200 }
201 }
157
161 static void tick_do_periodic_broadcast(void)
162 {
163 raw_spin_lock(&tick_broadcast_lock);
164
165 cpumask_and(to_cpumask(tmpmask),
166 cpu_online_mask, tick_get_broadcast_mask());
167 tick_do_broadcast(to_cpumask(tmpmask));
168
169 raw_spin_unlock(&tick_broadcast_lock);
170 }
128
131 static void tick_do_broadcast(struct cpumask *mask)
132 {
133 int cpu = smp_processor_id();
134 struct tick_device *td;
135
136
139 if (cpumask_test_cpu(cpu, mask)) {
140 cpumask_clear_cpu(cpu, mask);
141 td = &per_cpu(tick_cpu_device, cpu);
142 td->evtdev->event_handler(td->evtdev);
143 }
144
145 if (!cpumask_empty(mask)) {
146
152 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
153 td->evtdev->broadcast(mask);
154 }
155 }
Call Tree:
tick_setup_oneshot
114
117 void tick_setup_oneshot(struct clock_event_device *newdev,
118 void (*handler)(struct clock_event_device *),
119 ktime_t next_event)
120 {
121 newdev->event_handler = handler;
122 clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
123 tick_dev_program_event(newdev, next_event, 1);
124 }