Project Home
Project Home
Documents
Documents
Wiki
Wiki
Discussion Forums
Discussions
Project Information
Project Info
Forum Topic - kernel reading notes: (22 Items)
   
kernel reading notes  
1) what is RUNCPU and KERNCPU 
   x86 as example:

  2 dual-core system with hyper-threading(pay attention to apic id)

Processor 0, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 0
Processor 1, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 6
Processor 2, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 2
Processor 3, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 4
Processor 4, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 1
Processor 5, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 7
Processor 6, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 3
Processor 7, Speed 2793 Mhz, CPUID: GenuineIntel
Genuine Intel® CPU 2.80GHz
Family 15, Model 4, Stepping 8
Feature flags 0000649D:BFEBFBFF
L1 cache: 28K Bytes, L2 cache: 2048K Bytes
Processor Initial APIC ID: 5


this one is related to kernel when you read kernel source or kernel crash which talking about [RUNCPU,KERNCPU] in kernel
 crash. we all talk about smp so KERNCPU is cpu number we read from cpunum and cpunum is assigned when we switch into 
kernel mode. RUNCPU is a function call to get current cpu number in tr(x86). actually both are read from tr but under 
some conditions that KERNCPU is different from RUNCPU: cpu 1 enters kernel state(now) but cpu2 dumped when some 
condition happened then it called crash(), now we are in different cpu KERNCPU and RUNCPU. 
Re: kernel reading notes  
Is that ok to post code here?
Re: kernel reading notes  
Yao Zhao wrote:
> Is that ok to post code here?

It it doesnt violate someone's copyright, I dont see why not.
Re: kernel reading notes  
Yao Zhao wrote:
> 1) what is RUNCPU and KERNCPU

RUNCPU is the one you're currently running on. KERNCPU is the cpu that has entered the kernel.

If a 0 has taken an interrupt while cpu 2 is in the kernel

seen from cpu 0:  RUNCPU==0, KERNCPU==2

seen from cpu 2:  RUNCPU==2, KERNCPU==2
Re: kernel reading notes  
Thanks Attila, that is my conclusion please!
Re: kernel reading notes  
#if defined(WANT_SMP_MACROS) || defined(VARIANT_smp)
	#define KERNCPU					((unsigned)cpunum)
#else
	#define	KERNCPU					0
#endif
#if defined(VARIANT_smp)
	#ifndef PROCESSORS_MAX
	   #define PROCESSORS_MAX		8
	#endif
	#define NUM_PROCESSORS			num_processors	/*yzhao num_processors is assigned in syspage_init(), is from __syspage_ptr->
num_cpu*/
	#define RUNCPU					get_cpunum()
	#define SPINLOCK(spin)			do { while((spin)->value); } while(_smp_xchg(&(spin)->value, 1) != 0)
	#define SPINUNLOCK(spin)		((spin)->value = 0)
	#define SENDIPI(cpu,cmd)		send_ipi(cpu,cmd)
	#define SMP_FLUSH_TLB()			if(num_processors > 1) smp_flush_tlb()
	#define SMP_SPINVAR(class,var)	class intrspin_t var
	#define INTR_LOCK(s)			InterruptLock(s)
	#define INTR_UNLOCK(s)			InterruptUnlock(s)
#else
	#define PROCESSORS_MAX			1
	#define NUM_PROCESSORS			1
	#define RUNCPU					0
	#define SPINLOCK(spin)			
	#define SPINUNLOCK(spin)		
	#define SENDIPI(cpu,cmd)		
	#define SMP_FLUSH_TLB()		
	#define SMP_SPINVAR(class,var)
	#define INTR_LOCK(s)			InterruptDisable()
	#define INTR_UNLOCK(s)			InterruptEnable()
#endif
so under UP both of them are always 0 because it is representing cpu:)

/* yzhao we are still talking about RUNCPU
  *get_cpunum is defined in ker/arch/smp/smp_get_cpunum.S, I will only give x86 example and other archs are similar.
  */
.globl get_cpunum
get_cpunum:
	GETCPU	%ax, %eax
	ret
  /* yzhao so get_cpunum is only a function to call GETCPU with ax register as temporily register.*/

	.macro GETCPU, reg1, reg2
//		&seg
//		mov	lapicid_addr,®2	// LAPIC id register address
//		mov	(®2),®2			// Read the id
//		shr	$24,®2			// Id is in bits 27-24
		xor	®2,®2                 // clear reg1 and reg2
		str	®1                            // store the segment selector from Task register in reg1
		shr	$3,®2                       // shift right 3
		sub	$32,®2                     // -32 then we get it.
	.endm

	if(genflags & INTR_GENFLAG_LOAD_CPUNUM) {
		if(NUM_PROCESSORS == 1) {
			gen_load(REGNO_ESI, 0);
		} else {
			static uint8_t cpunum_burst[] = {
				0x31, 0xf6,			// xor %esi,%esi
				0x0f, 0x00, 0xce,	// str %si
				0xc1, 0xee, 0x03,	// shr $3,%esi
				0x83, 0xee, 0x20,	// sub $0x20,%esi
			};

			add_code(cpunum_burst, sizeof(cpunum_burst));
		}
	}

  /* yzhao obviously cpu number is in task register because finally we got it from reg1 and contents of reg1 is from 
task register. */

/* yzhao let's take a look at init_cpu() it will tell us more*/
void
init_cpu() {
	static int						cr0 = 0;
	static paddr32_t				cr3;
	int 							cpu;
	struct x86_seg_descriptor_entry	*sdp;
	X86_TSS							*tssp;
	uintptr_t						 addr;

	//  EM MP NE ET
	//   1  0  1  0   No FPU
	//   0  1  1  1   FPU == 387
	//   0  1  1  x   FPU > 387

	// The BIOS will have correctly set up the various cache options for the
	// boot processor only. Therefore we use it as a model for all other
	// processors.
	if(cr0 == 0) {
		cr0 = rdcr0() & ~(X86_MSW_EM_BIT | X86_MSW_MP_BIT);
		cr0 |= X86_MSW_TS_BIT | X86_MSW_NE_BIT | X86_MSW_AM_BIT;

		if((__cpu_flags & CPU_FLAG_FPU) && !fpuemul) {
			cr0 |= X86_MSW_MP_BIT;
		} else {
			cr0 |= X86_MSW_EM_BIT;
			set_trap(0x100 | 0x07, __fpuemu_stub);
		}

		if(SYSPAGE_ENTRY(cpuinfo)->cpu == 386) {
			cr0 |= X86_MSW_ET_BIT;
		}
		cr3 = rdpgdir();

	}
	ldcr0(cr0);
	ldpgdir(cr3);

#if defined(VARIANT_smp)
	cpu = init_send_ipi();
#else
	cpu = 0;
#endif

	tssp = _scalloc(sizeof(struct x86_tss_entry));
	tss[cpu] = tssp;
	tssp->ss0 = ker_ss;
	tssp->esp0 = 0; // Temp value till first thread runs
	tssp->pdbr = (unsigned)rdpgdir();

	addr = (unsigned) tssp;
	sdp = &_syspage_ptr->un.x86.gdt[32+cpu]; /*yzhao? it seemed gdt already been filled in in bios.s*/
	sdp->flags = X86_TYPE_TSS32_SEG | X86_DPL1;
	sdp->base_lo = addr & 0xffff;
	sdp->base_hi = (addr >> 16) &...
View Full Message
Re: kernel reading notes  
__ker_entry:
	PUSHREG				/ Save all registers in thread register save area
	GETCPU	%bp, %ebp

	mov	SIZEOF_REG-(2*4)(%esp),%edx	/ Pointer to users stack frame
	add	$4,%edx				/ Step over return address, point at arguments
	
	lea		-REG_OFF(%esp),%ebx	/ Recover actives
	
	mov		SMPREF(ker_stack,%ebp,4),%esp      // switch to the kernel stack
	
	jmp		__common_ker_entry

Re: kernel reading notes  
__common_ker_entry:
#ifdef VARIANT_instr
	SAVE_PERFREGS 0
#endif

#ifdef	VARIANT_smp


	mov		%eax,%esi	// save kernel call number /*yzhao in userspace every kernel call save kernel call number in eax, see 
ker_call_table*/
aquire_kernel_attempt:
	sti
    // Wait for need to run to clear if we're not on the right CPU. 
1:
    cmpl    $0,need_to_run	/*yzhao need_to_run thread == 0? */
	jz		3f			/*yzhao need_to_run==0, that means no thread need to run and preempt me, just go ahead, jump to 3 */
	cmpl    %ebp,need_to_run_cpu /*yzhao need_to_run!=0, compare current cpu == need_to_run_cpu? */
	je		3f					/*yzhao need_to_run_cpu is current cpu, jump to 3*/
	pause						/*yzhao pause, it was suggested by Intel for spin-loop wait, in Intel Xeon, P4 and dual cores
										we have to */
	jmp		1b

   	// See if anybody else is in the kernel
3: 
	mov		inkernel,%eax
	test	$INKERNEL_NOW,%eax /*yzhao and immed32, eax, set SF,ZF,PF according to result*/
	jnz		1b				/*yzhao if (eax & INKERNAL_NOW) jmp 1b*/


	cli						/*yzhao disable irq*/
end_aquire_kernel_attempt:

	mov		%eax,%edi		/*yzhao mov inkernel to edi */
	andl	$0x00ffffff,%edi		/*yzhao get edi lower 24 bits*/
	mov 	%ebp,%ecx		
	shl		$24,%ecx		/*yzhao get cpunum, ebp lower 8 bits represent cpunum */
	orl		%edi,%ecx		/ Set cpunum

	orl		$INKERNEL_NOW,%ecx	/*yzhao cpunum|inkernel|INKERNEL_NOW*/
	lock; cmpxchg	%ecx,inkernel	/*yzhao lock bus and xchg ecx and inkernel*/
	jnz		aquire_kernel_attempt	/*yzhao if ecx is not same with inkernel, again */
	// We are the kernel
	mov		%esi,%eax	// restore kernel call number
#else
	LOCKOP
	orl		$INKERNEL_NOW,inkernel	/ In the kernel (restartable)
#endif

	sti						/*yzhao enable irq*/
	cld

	mov		%eax,SYSCALL(%ebx)
	mov		TFLAGS(%ebx),%ecx
	and		$~(_NTO_TF_KERERR_SET+_NTO_TF_BUFF_MSG+_NTO_TF_KERERR_LOCK),%ecx
	mov		%ecx,TFLAGS(%ebx)
	cmp		$__KER_BAD,%eax
	jae		bad_func
	push	%edx
	push	%ebx
#if defined(VARIANT_instr)
	call	*_trace_call_table(,%eax,4)
#else
	call	*ker_call_table(,%eax,4)
#endif
	/ assuming that none of the kernel routines modify the 'act' parm
	/ on the stack
	pop		%ebx

	test	%eax,%eax
	jge		set_err
Re: kernel reading notes  
conclusion:
KERNCPU kernel on which cpu.
RUNCPU which cpu current code running on.
Re: kernel reading notes  
Yao Zhao wrote:
> conclusion:
> KERNCPU kernel on which cpu.
> RUNCPU which cpu current code running on.

Yup. That's my understanding too.
Re: kernel reading notes  
Because of code here then you know only one cpu can enter kernel state or get INKERNEL_NOW for system call. This makes 
KERN_CPU sense. It means there is a big kernel lock and kernel is not quite smp capable although interrupt, exceptions 
may not need this. 

Current kernel code relies this too much, I guess it is not easy to make it more smp capable but it is possible. Need a 
project?

it will be released in __ker_exit.
 
1:
    cmpl    $0,need_to_run	/*yzhao need_to_run thread == 0? */
	jz		3f			/*yzhao need_to_run==0, that means no thread need to run and preempt me, just go ahead, jump to 3 */
	cmpl    %ebp,need_to_run_cpu /*yzhao need_to_run!=0, compare current cpu == need_to_run_cpu? */
	je		3f					/*yzhao need_to_run_cpu is current cpu, jump to 3*/
	pause						/*yzhao pause, it was suggested by Intel for spin-loop wait, in Intel Xeon, P4 and dual cores
										we have to */
	jmp		1b

   	// See if anybody else is in the kernel
3: 
	mov		inkernel,%eax
	test	$INKERNEL_NOW,%eax /*yzhao and immed32, eax, set SF,ZF,PF according to result*/
	jnz		1b				/*yzhao if (eax & INKERNAL_NOW) jmp 1b*/


	cli						/*yzhao disable irq*/
end_aquire_kernel_attempt:

	mov		%eax,%edi		/*yzhao mov inkernel to edi */
	andl	$0x00ffffff,%edi		/*yzhao get edi lower 24 bits*/
	mov 	%ebp,%ecx		
	shl		$24,%ecx		/*yzhao get cpunum, ebp lower 8 bits represent cpunum */
	orl		%edi,%ecx		/ Set cpunum

	orl		$INKERNEL_NOW,%ecx	/*yzhao cpunum|inkernel|INKERNEL_NOW*/
	lock; cmpxchg	%ecx,inkernel	/*yzhao lock bus and xchg ecx and inkernel*/
	jnz		aquire_kernel_attempt	/*yzhao if ecx is not same with inkernel, again */
	// We are the kernel
	mov		%esi,%eax	// restore kernel call number
RE: kernel reading notes  
 

> -----Original Message-----
> From: Yao Zhao [mailto:yzhao@qnx.com] 
> Sent: September 18, 2007 11:36 AM
> To: ostech-core_os
> Subject: Re: kernel reading notes
> 
> Because of code here then you know only one cpu can enter 
> kernel state or get INKERNEL_NOW for system call. This makes 
> KERN_CPU sense. It means there is a big kernel lock and 
> kernel is not quite smp capable although interrupt, 
> exceptions may not need this. 
> 
> Current kernel code relies this too much, I guess it is not 
> easy to make it more smp capable but it is possible. Need a project?

[ Code comments snipped ]

Yao,

I'm not sure what your intention is with these posts?  

* Are you trying to fix a particular problem; if so then can you
create a post with a more accurate synopsis/title.

* Are you simply relaying your own personal notes from having read 
through the sources; if so, then perhaps contributing the commentary
to an appropriate wiki page would be better

* Are you doing something else?

This forum thread seems to be kind of random, and with your statement
above, I'm not sure where you are headed.

Thanks,
 Thomas
  
Re: RE: kernel reading notes  
how to edit my past posts?
Re: RE: kernel reading notes  
can any member in myqnx create wiki in myqnx?
Re: kernel reading notes  
Yao Zhao wrote:
> can any member in myqnx create wiki in myqnx?

If have joined the OS project, yes.

The wiki tells me that you have not joined the project. So you cant edit
wiki pages yet.

There  should be a "join" button on the OS project page:
http://community.qnx.com/sf/projects/core_os

Once you've joined and logged in though myqnx, you should be able to
edit the new new "Kernel Reading Notes" page that i've created:

<http://community.qnx.com/sf/wiki/do/viewPage/projects.core_os/wiki/KernelReadingNotes?_message=1190152282694>;


I've created that page as an empty template as a suggestion as to how we could capture code commeents.

To start, you might want to try adding some of your stuff under the x86/kernel.s heading.

-ad
test please ingore  
testing if replying by email to a posting here causes bounced-email reports.
RE: RE: kernel reading notes  
 

> -----Original Message-----
> From: Yao Zhao [mailto:yzhao@qnx.com] 
> Sent: September 18, 2007 1:38 PM
> To: ostech-core_os
> Subject: Re: RE: kernel reading notes
> 
> how to edit my past posts?

You can't.  You can delete and re-post but you
can't edit the messages themselves.

Thanks,
 Thomas
Re: RE: RE: kernel reading notes  
Sorry! I didn't find a "delete" button either.
I will delete them and post to a wiki page.
Re: kernel reading notes  
Yao Zhao wrote:
>
> Because of code here then you know only one cpu can enter kernel state 
> or get INKERNEL_NOW for system call. This makes KERN_CPU sense. It 
> means there is a big kernel lock and kernel is not quite smp capable 
> although interrupt, exceptions may not need this.
>
This is by design.  The amount of time spent in a microkernel kernel 
call is typically so short as to not be an issue.  Interrupts
acquire the kernel lock after the EOI, in order to process any event 
deliveries/scheduling decisions that were caused during interrupt
processing.

> Current kernel code relies this too much, I guess it is not easy to 
> make it more smp capable but it is possible. Need a project?
>
> it will be released in __ker_exit.
>  
> 1:
>     cmpl    $0,need_to_run      /*yzhao need_to_run thread == 0? */
>         jz              3f                      /*yzhao 
> need_to_run==0, that means no thread need to run and preempt me, just 
> go ahead, jump to 3 */
>
>         cmpl    %ebp,need_to_run_cpu /*yzhao need_to_run!=0, compare 
> current cpu == need_to_run_cpu? */
>         je              3f                                      
> /*yzhao need_to_run_cpu is current cpu, jump to 3*/
>         pause                                           /*yzhao pause, 
> it was suggested by Intel for spin-loop wait, in Intel Xeon, P4 and 
> dual cores
>
>                                                                 
>                 we have to */
>         jmp             1b
>
>         // See if anybody else is in the kernel
> 3:
>         mov             inkernel,%eax
>         test    $INKERNEL_NOW,%eax /*yzhao and immed32, eax, set 
> SF,ZF,PF according to result*/
>         jnz             1b                              /*yzhao if 
> (eax & INKERNAL_NOW) jmp 1b*/
>
>
>         cli                                             /*yzhao 
> disable irq*/
> end_aquire_kernel_attempt:
>
>         mov             %eax,%edi               /*yzhao mov inkernel 
> to edi */
>         andl    $0x00ffffff,%edi                /*yzhao get edi lower 
> 24 bits*/
>         mov     %ebp,%ecx              
>         shl             $24,%ecx                /*yzhao get cpunum, 
> ebp lower 8 bits represent cpunum */
>         orl             %edi,%ecx               / Set cpunum
>
>         orl             $INKERNEL_NOW,%ecx      /*yzhao 
> cpunum|inkernel|INKERNEL_NOW*/
>         lock; cmpxchg   %ecx,inkernel   /*yzhao lock bus and xchg ecx 
> and inkernel*/
>         jnz             aquire_kernel_attempt   /*yzhao if ecx is not 
> same with inkernel, again */
>         // We are the kernel
>         mov             %esi,%eax       // restore kernel call number
>
> _______________________________________________
> OSTech
> http://community.qnx.com/sf/go/post1353
>

-- 
cburgess@qnx.com

Re: kernel reading notes  
Thanks Colin!
Re: kernel reading notes  
 UP code flow(boot)

hardware/startup/lib/_main.c

                _main
                  |
               startnext
                  |
              cpu_startnext
                  |
               exec386
                  |
 services/system/ker/x86/_cstart_.S _start
                  |
   services/system/ker/_main.c: _main
                  |
               kernel_main
                  |
               init_objects
                  |
                  idle
Re: kernel reading notes  
 smp code flow(boot)

other cpus boot up procedure:

             smp_start
                 |
            cpu_startnext 
                 |
           smp_spin_vaddr (in init_smp we initialized smp_spin_vaddr = smp_spin)
                 |
             smp_spin ( hardware/startup/lib/x86/callout_apstart.S)
                 |
             
              _smpstart
                  |
              init_smp
                  |
               ker_start

in a nutshell, the first cpu goes into normal boot(see UP) and calls smp_hook_rtn(start_aps), in start_aps it will loop 
to run board_smp_start(i, smp_start), in x86 board_smp_start is just apic_smp_start then first call of board_smp_start 
will copy apstart16_end - apstart16_start code to a block of low address memory and also set apstart32_ptr with 
smp_start. write INIT, STARTUP to Apic then this cpu will start from smp_start (actually from apstart16_start then it 
will jump to 32 bit protection mode). in smp_spin it will finally jump to struct x86_smpinfo_entry.ap_start_addr and it 
is _smpstart.

hardware/startup/lib/x86/cstart32.S
                _start
                   |
                 _main
           /                                                    |                |
hardware/startup/boards/bios/main.c                                 
                main                                          smp_hook_rtn   startnext
                  |
hardware/startup/lib/init_smp.c 
             init_smp(	
        smp_hook_rtn = start_aps; 
/* yzhao this will be called in startup/lib/_main.c _main */

	smp_spin_vaddr = (void (*)(void))&smp_spin;
        //Hook into the sizing/writing list
	smp_prev_output_rtn = callout_output_rtn;
	callout_output_rtn = 
          (output_callout_t *)callout_output_smp;