在内核中针对的cpu的操作,比如arm_cpuidle_init、arm_cpuidle_suspend、boot_secondary、secondary_start_kernel、op_cpu_disable、op_cpu_kill、cpu_die、smp_cpu_setup、smp_prepare_cpus的都会回落到对cpu_ops的调用。
 cpu_ops将针对底层cpu的操作抽象为一系列回调函数,以统一的形式向上层提供API。
 cpu_psci_ops作为cpu_ops的一个特殊实现,将cpu_ops关联到PSCI的psci_ops。
 psci_ops的函数在PSCI Firmware中实现,提供一系列基于Function ID的调用。
 这种分层思想将内核通用cpu_operations和硬件相关部分分隔开。
 cpu_operations及其应用
 首先分析一些cpu_operations这个结构体:
    | struct cpu_operations {const char    *name;
 int        (*cpu_init)(unsigned int);  读取必要的数据准备初始化。
 int        (*cpu_prepare)(unsigned int);  启动前准备工作
 int        (*cpu_boot)(unsigned int);  启动一个CPU
 void        (*cpu_postboot)(void);  执行boot后的清理工作
 #ifdef CONFIG_HOTPLUG_CPU
 int        (*cpu_disable)(unsigned int cpu);  关闭CPU之前的准备工作
 void        (*cpu_die)(unsigned int cpu);  关闭CPU
 int        (*cpu_kill)(unsigned int cpu);  确认是否关闭
 #endif
 #ifdef CONFIG_CPU_IDLE
 int        (*cpu_init_idle)(unsigned int);  读取CPU idle状态的参数
 int        (*cpu_suspend)(unsigned long);  suspend一个CPU,并且保存上下文
 #endif
 };
 | 
 cpu_init
    | static int __init smp_cpu_setup(int cpu){
 if (cpu_read_ops(cpu))
 return -ENODEV;
     if (cpu_ops[cpu]->cpu_init(cpu))return -ENODEV;
     set_cpu_possible(cpu, true);     return 0;}
 | 
 获取指定cpu的cpu_ops,执行cpu_init回调函数进行初始化。并将此cpu设置为possible。
 cpu_prepare
    | void __init smp_prepare_cpus(unsigned int max_cpus){
 int err;
 unsigned int cpu, ncores = num_possible_cpus();
     init_cpu_topology();  填充cpu_topology结构体数组     smp_store_cpu_info(smp_processor_id());     /** are we trying to boot more cores than exist?
 */
 if (max_cpus > ncores)  不能超过possible cpu数目
 max_cpus = ncores;
     /* Don‘t bother if we‘re effectively UP */if (max_cpus <= 1)
 return;
     /** Initialise the present map (which describes the set of CPUs
 * actually populated at the present time) and release the
 * secondaries from the bootloader.
 *
 * Make sure we online at most (max_cpus - 1) additional CPUs.
 */
 max_cpus--;
 for_each_possible_cpu(cpu) {
 if (max_cpus == 0)
 break;
         if (cpu == smp_processor_id())continue;
         if (!cpu_ops[cpu])continue;
         err = cpu_ops[cpu]->cpu_prepare(cpu);  执行.cpu_prepare回调函数,将指定cpu设置为present。if (err)
 continue;
         set_cpu_present(cpu, true);max_cpus--;
 }
 }
 | 
 cpu_boot
    | static int boot_secondary(unsigned int cpu, struct task_struct *idle){
 if (cpu_ops[cpu]->cpu_boot)
 return cpu_ops[cpu]->cpu_boot(cpu);
     return -EOPNOTSUPP;}
 | 
 cpu_postboot
    | asmlinkage void secondary_start_kernel(void){
 struct mm_struct *mm = &init_mm;
 unsigned int cpu = smp_processor_id();
     /** All kernel threads share the same mm context; grab a
 * reference and switch to it.
 */
 atomic_inc(&mm->mm_count);
 current->active_mm = mm;
     set_my_cpu_offset(per_cpu_offset(smp_processor_id()));     /** TTBR0 is only used for the identity mapping at this stage. Make it
 * point to zero page to avoid speculatively fetching new entries.
 */
 cpu_set_reserved_ttbr0();
 local_flush_tlb_all();
 cpu_set_default_tcr_t0sz();
     preempt_disable();trace_hardirqs_off();
     /** If the system has established the capabilities, make sure
 * this CPU ticks all of those. If it doesn‘t, the CPU will
 * fail to come online.
 */
 verify_local_cpu_capabilities();
     if (cpu_ops[cpu]->cpu_postboot)cpu_ops[cpu]->cpu_postboot();
     /** Log the CPU info before it is marked online and might get read.
 */
 cpuinfo_store_cpu();
     /** Enable GIC and timers.
 */
 notify_cpu_starting(cpu);
     smp_store_cpu_info(cpu);     /** OK, now it‘s safe to let the boot CPU continue.  Wait for
 * the CPU migration code to notice that the CPU is online
 * before we continue.
 */
 pr_info("CPU%u: Booted secondary processor [%08x]\n",
 cpu, read_cpuid_id());
 set_cpu_online(cpu, true);
 complete(&cpu_running);
     local_dbg_enable();local_irq_enable();
 local_async_enable();
     /** OK, it‘s off to the idle thread for us
 */
 cpu_startup_entry(CPUHP_ONLINE);
 }
 | 
 cpu_disable
    | static int op_cpu_disable(unsigned int cpu){
 /*
 * If we don‘t have a cpu_die method, abort before we reach the point
 * of no return. CPU0 may not have an cpu_ops, so test for it.
 */
 if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
 return -EOPNOTSUPP;
     /** We may need to abort a hot unplug for some other mechanism-specific
 * reason.
 */
 if (cpu_ops[cpu]->cpu_disable)
 return cpu_ops[cpu]->cpu_disable(cpu);
     return 0;}
 | 
 cpu_die
    | void cpu_die(void){
 unsigned int cpu = smp_processor_id();
     idle_task_exit();     local_irq_disable();     /* Tell __cpu_die() that this CPU is now safe to dispose of */(void)cpu_report_death();
     /** Actually shutdown the CPU. This must never fail. The specific hotplug
 * mechanism must perform all required cache maintenance to ensure that
 * no dirty lines are lost in the process of shutting down the CPU.
 */
 cpu_ops[cpu]->cpu_die(cpu);
     BUG();}
 | 
 cpu_kill
    | static int op_cpu_kill(unsigned int cpu){
 /*
 * If we have no means of synchronising with the dying CPU, then assume
 * that it is really dead. We can only wait for an arbitrary length of
 * time and hope that it‘s dead, so let‘s skip the wait and just hope.
 */
 if (!cpu_ops[cpu]->cpu_kill)
 return 0;
     return cpu_ops[cpu]->cpu_kill(cpu);}
 | 
 cpu_init_idle
    | int __init arm_cpuidle_init(unsigned int cpu){
 int ret = -EOPNOTSUPP;
     if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)ret = cpu_ops[cpu]->cpu_init_idle(cpu);
     return ret;}
 | 
 cpu_suspend
    | int arm_cpuidle_suspend(int index){
 int cpu = smp_processor_id();
     /** If cpu_ops have not been registered or suspend
 * has not been initialized, cpu_suspend call fails early.
 */
 if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
 return -EOPNOTSUPP;
 return cpu_ops[cpu]->cpu_suspend(index);
 }
 | 
 cpu_ops到arch-dependent的关联
 以start_kernel为起点,查看从内核开始到获取cpu_ops的路径如下:
    | start_kernel -->setup_arch
 -->cpu_read_bootcpu_ops  只获取bootcpu的cpu_ops
 -->cpu_read_bootcpu_ops
 -->cpu_read_ops(0)
 -->smp_init_cpus  获取nonboot cpu的cpu_ops
 -->smp_cpu_setup
 -->cpu_read_ops
 | 
 cpu_read_ops是获取cpu_ops的关键,参数是cpu的序列号,输出是cpu_ops[cpu]。
    | int __init cpu_read_ops(int cpu){
 const char *enable_method = cpu_read_enable_method(cpu);  从DeviceTree获取enable_method字符串
     if (!enable_method)return -ENODEV;
     cpu_ops[cpu] = cpu_get_ops(enable_method);  根据enable_method字符串在supported_cpu_ops获取指针if (!cpu_ops[cpu]) {
 pr_warn("Unsupported enable-method: %s\n", enable_method);
 return -EOPNOTSUPP;
 }
     return 0;}
 | 
 通过cpu0的DeviceTree可以看出enable-method为pcsi。
 
 支持的cpu_operations有:
    | static const struct cpu_operations *supported_cpu_ops[] __initconst = {&smp_spin_table_ops,
 &cpu_psci_ops,
 NULL,
 };
 | 
 所以cpu_ops=&cpu_psci_ops。
 cpu_psci_ops分析
 cpu_psci_ops结构体可以说是cpu_operations和psci_operations的桥梁,他讲cpu_operations的一些列回调函数,映射到psci_operations。
    | const struct cpu_operations cpu_psci_ops = {.name        = "psci",
 #ifdef CONFIG_CPU_IDLE
 .cpu_init_idle    = cpu_psci_cpu_init_idle,  从DeviceTree获取CPU idle状态数据
 .cpu_suspend    = cpu_psci_cpu_suspend,  根据是否丢失上下文来选择是psci_ops.cpu_suspend还是cpu_suspend
 #endif
 .cpu_init    = cpu_psci_cpu_init,  为空
 .cpu_prepare    = cpu_psci_cpu_prepare,  只是判断psci_ops.cpu_on是否存在,不存在则返回错误。
 .cpu_boot    = cpu_psci_cpu_boot,  调用psci_ops.cpu_on
 #ifdef CONFIG_HOTPLUG_CPU
 .cpu_disable    = cpu_psci_cpu_disable,  检查是否支持psci_ops.cpu_off。
 .cpu_die    = cpu_psci_cpu_die,  调用psci_ops.cpu_off
 .cpu_kill    = cpu_psci_cpu_kill,  检查指定cpu是否已经被kill
 #endif
 }
 | 
 cpu_psci_cpu_boot
    | static int cpu_psci_cpu_boot(unsigned int cpu){
 int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
 if (err)
 pr_err("failed to boot CPU%d (%d)\n", cpu, err);
     return err;}
 | 
 CPU_ON用于secondary boot、hotplug或者big.LITTLE迁移。如果需要从一个核启动另一个核,通过CPU_ON提供一个入口地址和上下文标识。
 PCSI提供必要的操作启动一个核,并且在提供的入口地址开始执行,上下文标识必须存在R0或者W0中。这里的入口地址就对应secondary_entry。
 在arch/arm64/kernel/head.S中:
    | secondary_entry—>secondary_startup—>__secondary_switched—>secondary_start_kernel 
 ENTRY(secondary_entry)bl    el2_setup            // Drop to EL1
 bl    set_cpu_boot_mode_flag
 b    secondary_startup
 ENDPROC(secondary_entry)
 ENTRY(secondary_startup)/*
 * Common entry point for secondary CPUs.
 */
 adrp    x25, idmap_pg_dir
 adrp    x26, swapper_pg_dir
 bl    __cpu_setup            // initialise processor
     ldr    x21, =secondary_dataldr    x27, =__secondary_switched    // address to jump to after enabling the MMU
 b    __enable_mmu
 ENDPROC(secondary_startup)
 ENTRY(__secondary_switched)ldr    x0, [x21]            // get secondary_data.stack
 mov    sp, x0
 mov    x29, #0
 b    secondary_start_kernel
 ENDPROC(__secondary_switched)
 | 
 在secondary_start_kernel将CPU设置为online,并调用.cpu_postboot回调函数,进行boot后处理。然后cpu_startup_entry启动idle线程。
 cpu_psci_cpu_init_idle
    | static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu){
 int i, ret, count = 0;
 u32 *psci_states;
 struct device_node *state_node, *cpu_node;
     cpu_node = of_get_cpu_node(cpu, NULL);if (!cpu_node)
 return -ENODEV;
     /** If the PSCI cpu_suspend function hook has not been initialized
 * idle states must not be enabled, so bail out
 */
 if (!psci_ops.cpu_suspend)
 return -EOPNOTSUPP;
     /* Count idle states */while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
 count))) {
 count++;
 of_node_put(state_node);
 }
     if (!count)return -ENODEV;
     psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);if (!psci_states)
 return -ENOMEM;
     for (i = 0; i < count; i++) {u32 state;
         state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);         ret = of_property_read_u32(state_node,"arm,psci-suspend-param",
 &state);
 if (ret) {
 pr_warn(" * %s missing arm,psci-suspend-param property\n",
 state_node->full_name);
 of_node_put(state_node);
 goto free_mem;
 }
         of_node_put(state_node);pr_debug("psci-power-state %#x index %d\n", state, i);
 if (!psci_power_state_is_valid(state)) {
 pr_warn("Invalid PSCI power state %#x\n", state);
 ret = -EINVAL;
 goto free_mem;
 }
 psci_states[i] = state;
 }
 /* Idle states parsed correctly, initialize per-cpu pointer */
 per_cpu(psci_power_state, cpu) = psci_states;
 return 0;
 free_mem:kfree(psci_states);
 return ret;
 }
 | 
 1.解析DeviceTree中cpu下的cpu-idle-states属性
 
 2.从每个state中获取arm,psci-suspend-param的参数,并验证是否有效。
 
 3.初始化per-CPU类型的指针psci_power_state。
 cpu_psci_cpu_suspend
    | static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index){
 int ret;
 u32 *state = __this_cpu_read(psci_power_state);  从psci_power_state中读取suspend的state参数。
 /*
 * idle state index 0 corresponds to wfi, should never be called
 * from the cpu_suspend operations
 */
 if (WARN_ON_ONCE(!index))
 return -EINVAL;
     if (!psci_power_state_loses_context(state[index - 1]))ret = psci_ops.cpu_suspend(state[index - 1], 0);
 else
 ret = cpu_suspend(index, psci_suspend_finisher);
     return ret;}
 | 
  
 psci_ops
 由于acpi_disabled,所以psci通过DeviceTree获取相关参数。
    | start_kernel -->setup_arch
 -->psci_dt_init  这个函数在cpu_ops之前,因为cpu_ops依赖psci_ops
 | 
 psci有不同版本,需要通过DeviceTree获取版本信息和使用的method(是smc还是)。
 
 通过查看DeviceTree可以看到对应的是psci_0_2_init。
    | static const struct of_device_id const psci_of_match[] __initconst = {{ .compatible = "arm,psci",    .data = psci_0_1_init},
 { .compatible = "arm,psci-0.2",    .data = psci_0_2_init},
 { .compatible = "arm,psci-1.0",    .data = psci_0_2_init},
 {},
 };
 | 
 psci_dt_init解析DeviceTree执行对应psci版本的初始化函数。
    | int __init psci_dt_init(void){
 struct device_node *np;
 const struct of_device_id *matched_np;
 psci_initcall_t init_fn;
     np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);     if (!np)return -ENODEV;
     init_fn = (psci_initcall_t)matched_np->data;return init_fn(np);
 }
 | 
 psci_0_2_init设置method,然后调用psci_probe:
    | static int __init psci_0_2_init(struct device_node *np){
 int err;
     err = get_set_conduit_method(np);  从DeviceTree可知invoke_psci_fn = __invoke_psci_fn_smc     if (err)goto out_put_node;
 /*
 * Starting with v0.2, the PSCI specification introduced a call
 * (PSCI_VERSION) that allows probing the firmware version, so
 * that PSCI function IDs and version specific initialization
 * can be carried out according to the specific version reported
 * by firmware
 */
 err = psci_probe();
 out_put_node:of_node_put(np);
 return err;
 }
 | 
 psci_probe设置PSCI版本高于0.2的回调函数,以及arm_pm_restart和pm_power_off。
    | static void __init psci_0_2_set_functions(void){
 pr_info("Using standard PSCI v0.2 function IDs\n");
 psci_function_id[PSCI_FN_CPU_SUSPEND] =
 PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
 psci_ops.cpu_suspend = psci_cpu_suspend;
     psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;psci_ops.cpu_off = psci_cpu_off;
     psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON);psci_ops.cpu_on = psci_cpu_on;
     psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE);psci_ops.migrate = psci_migrate;
     psci_ops.affinity_info = psci_affinity_info;     psci_ops.migrate_info_type = psci_migrate_info_type;     arm_pm_restart = psci_sys_reset;     pm_power_off = psci_sys_poweroff;}
 | 
 这些函数都有一个共性invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0),着这里invoke_psci_fn指向__invoke_psci_fn_smc 。
 __invoke_psci_fn_smc指向arch/arm64/kernel/psci-call.S定义的函数:
    | /* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ENTRY(__invoke_psci_fn_smc)
 smc    #0
 ret
 ENDPROC(__invoke_psci_fn_smc)
 | 
 http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf Chapter5有PSCI函数圆形和相关参数返回值的介绍。
 第一个参数是Function ID,后面三个参数作为Function ID的参数。如果使用的是32位的参数,后三个参数通过r0-r3传递给Function ID,r0存放返回值;如果使用64位的参数,后三个参数通过W0-W3传递,w0存放返回值。这些Function ID的实现,在对应的Firmware中,但是可以通过上述pdf查看输入输出细节。
 PSCI除了提供psci_ops的回调函数之外,还提供以restart和power off的arch-dependent函数arm_pm_restart和pm_power_off
 比如machine_power_off和machine_restart调用:
    | void machine_power_off(void){
 local_irq_disable();
 smp_send_stop();
 if (pm_power_off)
 pm_power_off();
 }
 void machine_restart(char *cmd){
 /* Disable interrupts first */
 local_irq_disable();
 smp_send_stop();
     /** UpdateCapsule() depends on the system being reset via
 * ResetSystem().
 */
 if (efi_enabled(EFI_RUNTIME_SERVICES))
 efi_reboot(reboot_mode, NULL);
     /* Now call the architecture specific reboot code. */if (arm_pm_restart)
 arm_pm_restart(reboot_mode, cmd);
 else
 do_kernel_restart(cmd);
     /** Whoops - the architecture was unable to reboot.
 */
 printk("Reboot failed -- System halted\n");
 while (1);
 }
 
 | 
   
 参考文档
 Linux CPU core的电源管理(3)_cpu ops:http://www.wowotech.net/pm_subsystem/cpu_ops.html
cpu_ops、cpu_psci_ops、psci_ops、suspend_ops以及arm_idle_driver
原文:http://www.cnblogs.com/arnoldlu/p/6344847.html