SimpleOS

LXR

Navigation



Site hébergé par : enix

The LXR Cross Referencer for SOS

source navigation ]
diff markup ]
identifier search ]
general search ]
 
 
Article:1 ] [ 2 ] [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 6.5 ] [ 7 ] [ 7.5 ] [ 8 ] [ 9 ] [ 9.5 ]

001 /* Copyright (C) 2004  David Decotigny
002 
003    This program is free software; you can redistribute it and/or
004    modify it under the terms of the GNU General Public License
005    as published by the Free Software Foundation; either version 2
006    of the License, or (at your option) any later version.
007    
008    This program is distributed in the hope that it will be useful,
009    but WITHOUT ANY WARRANTY; without even the implied warranty of
010    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
011    GNU General Public License for more details.
012    
013    You should have received a copy of the GNU General Public License
014    along with this program; if not, write to the Free Software
015    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
016    USA. 
017 */
018 #include <sos/physmem.h>
019 #include <sos/klibc.h>
020 #include <sos/assert.h>
021 
022 #include "mm_context.h"
023 
024 #include "paging.h"
025 
026 
027 /*
028  * Important NOTICE concerning the use of the reference & occupation
029  * counters of the physical pages by the "paging" subsystem:
030  *   - All the kernel PT are SHARED. This means that as soon as one
031  *     kernel PT belongs to one mm_context, it belongs to ALL the
032  *     mm_contexts. We don't update the real reference count of the PT
033  *     in this respect, because it would require to update the
034  *     reference counts of ALL the kernel PTs as soon as a new
035  *     mm_context is created, or as soon as a mm_context is
036  *     suppressed. This way, the reference count is constant
037  *     independently of the actual number of PD really sharing them.
038  *   - We do NOT maintain the occupation count of the PDs. This would add
039  *     some little overhead that is useless
040  *   - We do maintain the occupation count of ALL the PTs: it represents the
041  *     number of PTE allocated in the PT
042  */
043 
044 
045 /** The structure of a page directory entry. See Intel vol 3 section
046     3.6.4 */
047 struct x86_pde
048 {
049   sos_ui32_t present        :1; /* 1=PT mapped */
050   sos_ui32_t write          :1; /* 0=read-only, 1=read/write */
051   sos_ui32_t user           :1; /* 0=supervisor, 1=user */
052   sos_ui32_t write_through  :1; /* 0=write-back, 1=write-through */
053   sos_ui32_t cache_disabled :1; /* 1=cache disabled */
054   sos_ui32_t accessed       :1; /* 1=read/write access since last clear */
055   sos_ui32_t zero           :1; /* Intel reserved */
056   sos_ui32_t page_size      :1; /* 0=4kB, 1=4MB or 2MB (depending on PAE) */
057   sos_ui32_t global_page    :1; /* Ignored (Intel reserved) */
058   sos_ui32_t custom         :3; /* Do what you want with them */
059   sos_ui32_t pt_paddr       :20;
060 } __attribute__ ((packed));
061 
062 
063 /** Intermediate type to speed up PDE copy */
064 typedef union {
065   struct x86_pde pde;
066   sos_ui32_t     ui32;
067 } x86_pde_val_t;
068 
069 
070 /** The structure of a page table entry. See Intel vol 3 section
071     3.6.4 */
072 struct x86_pte
073 {
074   sos_ui32_t present        :1; /* 1=PT mapped */
075   sos_ui32_t write          :1; /* 0=read-only, 1=read/write */
076   sos_ui32_t user           :1; /* 0=supervisor, 1=user */
077   sos_ui32_t write_through  :1; /* 0=write-back, 1=write-through */
078   sos_ui32_t cache_disabled :1; /* 1=cache disabled */
079   sos_ui32_t accessed       :1; /* 1=read/write access since last clear */
080   sos_ui32_t dirty          :1; /* 1=write access since last clear */
081   sos_ui32_t zero           :1; /* Intel reserved */
082   sos_ui32_t global_page    :1; /* 1=No TLB invalidation upon cr3 switch
083                                    (when PG set in cr4) */
084   sos_ui32_t custom         :3; /* Do what you want with them */
085   sos_ui32_t paddr          :20;
086 } __attribute__ ((packed));
087 
088 
089 /** Intermediate type to speed up PTE copy */
090 typedef union {
091   struct x86_pte pte;
092   sos_ui32_t     ui32;
093 } x86_pte_val_t;
094 
095 
096 /** Structure of the x86 CR3 register: the Page Directory Base
097     Register. See Intel x86 doc Vol 3 section 2.5 */
098 struct x86_pdbr
099 {
100   sos_ui32_t zero1          :3; /* Intel reserved */
101   sos_ui32_t write_through  :1; /* 0=write-back, 1=write-through */
102   sos_ui32_t cache_disabled :1; /* 1=cache disabled */
103   sos_ui32_t zero2          :7; /* Intel reserved */
104   sos_ui32_t pd_paddr       :20;
105 } __attribute__ ((packed));
106 
107 
108 /**
109  * Helper macro to control the MMU: invalidate the TLB entry for the
110  * page located at the given virtual address. See Intel x86 vol 3
111  * section 3.7.
112  */
113 #define invlpg(vaddr) \
114   do { \
115        __asm__ __volatile__("invlpg %0"::"m"(*((unsigned *)(vaddr)))); \
116   } while(0)
117 
118 
119 /**
120  * Helper macro to control the MMU: invalidate the whole TLB. See
121  * Intel x86 vol 3 section 3.7.
122  */
123 #define flush_tlb() \
124   do { \
125         unsigned long tmpreg; \
126         asm volatile("movl %%cr3,%0\n\tmovl %0,%%cr3" :"=r" \
127                      (tmpreg) : :"memory"); \
128   } while (0)
129 
130 
131 /**
132  * Helper macro to compute the index in the PD for the given virtual
133  * address
134  */
135 #define virt_to_pd_index(vaddr) \
136   (((unsigned)(vaddr)) >> 22)
137 
138 
139 /**
140  * Helper macro to compute the index in the PT for the given virtual
141  * address
142  */
143 #define virt_to_pt_index(vaddr) \
144   ( (((unsigned)(vaddr)) >> 12) & 0x3ff )
145 
146 
147 /**
148  * Helper macro to compute the offset in the page for the given virtual
149  * address
150  */
151 #define virt_to_page_offset(vaddr) \
152   (((unsigned)(vaddr)) & SOS_PAGE_MASK)
153 
154 
155 /**
156  * Helper function to map a page in the pd.\ Suppose that the RAM
157  * is identity mapped to resolve PT actual (CPU) address from the PD
158  * entry
159  */
160 static sos_ret_t paging_setup_map_helper(struct x86_pde * pd,
161                                          sos_paddr_t ppage,
162                                          sos_vaddr_t vaddr)
163 {
164   /* Get the page directory entry and table entry index for this
165      address */
166   unsigned index_in_pd = virt_to_pd_index(vaddr);
167   unsigned index_in_pt = virt_to_pt_index(vaddr);
168 
169   /* Make sure the page table was mapped */
170   struct x86_pte * pt;
171   if (pd[index_in_pd].present)
172     {
173       pt = (struct x86_pte*) (pd[index_in_pd].pt_paddr << 12);
174 
175       /* This test will always be TRUE here, since the setup routine
176          scans the kernel pages in a strictly increasing order: at
177          each step, the map will result in the allocation of a new PT
178          entry. For the sake of clarity, we keep the test here. */
179       if (pt[index_in_pt].present)
180         SOS_ASSERT_FATAL(FALSE); /* indicate a fatal error */
181     }
182   else
183     {
184       /* No : allocate a new one */
185       pt = (struct x86_pte*) sos_physmem_ref_physpage_new(FALSE);
186       if (! pt)
187         return -SOS_ENOMEM;
188       
189       memset((void*)pt, 0x0, SOS_PAGE_SIZE);
190 
191       pd[index_in_pd].present  = TRUE;
192       pd[index_in_pd].write    = 1; /* It would be too complicated to
193                                        determine whether it
194                                        corresponds to a real R/W area
195                                        of the kernel code/data or
196                                        read-only */
197       pd[index_in_pd].pt_paddr = ((sos_paddr_t)pt) >> 12;
198     }
199 
200   
201   /* Map the page in the page table */
202   pt[index_in_pt].present = 1;
203   pt[index_in_pt].write   = 1;  /* It would be too complicated to
204                                    determine whether it corresponds to
205                                    a real R/W area of the kernel
206                                    code/data or R/O only */
207   pt[index_in_pt].user    = 0;
208   pt[index_in_pt].paddr   = ppage >> 12;
209 
210   /* Increase the PT's occupation count because we allocated a new PTE
211      inside it */
212   sos_physmem_inc_physpage_occupation((sos_paddr_t)pt);
213 
214   return SOS_OK;
215 }
216 
217 
218 sos_ret_t sos_paging_subsystem_setup(sos_paddr_t identity_mapping_base,
219                                      sos_paddr_t identity_mapping_top)
220 {
221   /* The PDBR we will setup below */
222   struct x86_pdbr cr3;  
223 
224   /* Get the PD for the kernel */
225   struct x86_pde * pd
226     = (struct x86_pde*) sos_physmem_ref_physpage_new(FALSE);
227 
228   /* The iterator for scanning the kernel area */
229   sos_paddr_t paddr;
230 
231   /* Reset the PD. For the moment, there is still an IM for the whole
232      RAM, so that the paddr are also vaddr */
233   memset((void*)pd,
234          0x0,
235          SOS_PAGE_SIZE);
236 
237   /* Identity-map the identity_mapping_* area */
238   for (paddr = identity_mapping_base ;
239        paddr < identity_mapping_top ;
240        paddr += SOS_PAGE_SIZE)
241     {
242       if (paging_setup_map_helper(pd, paddr, paddr))
243         return -SOS_ENOMEM;
244     }
245 
246   /* Identity-map the PC-specific BIOS/Video area */
247   for (paddr = BIOS_N_VIDEO_START ;
248        paddr < BIOS_N_VIDEO_END ;
249        paddr += SOS_PAGE_SIZE)
250     {
251       if (paging_setup_map_helper(pd, paddr, paddr))
252         return -SOS_ENOMEM;
253     }
254 
255   /* Ok, kernel is now identity mapped in the PD. We still have to set
256      up the mirroring */
257   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].present = TRUE;
258   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].write = 1;
259   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].user  = 0;
260   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].pt_paddr 
261     = ((sos_paddr_t)pd)>>12;
262 
263   /* We now just have to configure the MMU to use our PD. See Intel
264      x86 doc vol 3, section 3.6.3 */
265   memset(& cr3, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
266   cr3.pd_paddr = ((sos_paddr_t)pd) >> 12;
267 
268  /* Actual loading of the PDBR in the MMU: setup cr3 + bits 31[Paging
269     Enabled] and 16[Write Protect] of cr0, see Intel x86 doc vol 3,
270     sections 2.5, 3.6.1 and 4.11.3 + note table 4-2 */
271   asm volatile ("movl %0,%%cr3\n\t"
272                 "movl %%cr0,%%eax\n\t"
273                 "orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */
274                 "movl %%eax,%%cr0\n\t"
275                 "jmp 1f\n\t"
276                 "1:\n\t"
277                 "movl $2f, %%eax\n\t"
278                 "jmp *%%eax\n\t"
279                 "2:\n\t" ::"r"(cr3):"memory","eax");
280 
281   /*
282    * Here, the only memory available is:
283    * - The BIOS+video area
284    * - the identity_mapping_base .. identity_mapping_top area
285    * - the PD mirroring area (4M)
286    * All accesses to other virtual addresses will generate a #PF
287    */
288 
289   return SOS_OK;
290 }
291 
292 
293 /* Suppose that the current address is configured with the mirroring
294  * enabled to access the PD and PT. */
295 sos_ret_t sos_paging_map(sos_paddr_t ppage_paddr,
296                          sos_vaddr_t vpage_vaddr,
297                          sos_bool_t is_user_page,
298                          sos_ui32_t flags)
299 {
300   /* Get the page directory entry and table entry index for this
301      address */
302   unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
303   unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
304   
305   /* Get the PD of the current context */
306   struct x86_pde *pd = (struct x86_pde*)
307     (SOS_PAGING_MIRROR_VADDR
308      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
309 
310   /* Address of the PT in the mirroring */
311   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
312                                            + SOS_PAGE_SIZE*index_in_pd);
313 
314   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(ppage_paddr));
315   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
316 
317   /* EXEC permission ignored on x86 */
318   flags &= ~SOS_VM_MAP_PROT_EXEC;
319 
320   /* The mapping of anywhere in the PD mirroring is FORBIDDEN ;) */
321   if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
322       && (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
323     return -SOS_EINVAL;
324 
325   /* Map a page for the PT if necessary */
326   if (! pd[index_in_pd].present)
327     {
328       x86_pde_val_t u;
329       
330       /* No : allocate a new one */
331       sos_paddr_t pt_ppage
332         = sos_physmem_ref_physpage_new(! (flags & SOS_VM_MAP_ATOMIC));
333       if (! pt_ppage)
334         {
335           return -SOS_ENOMEM;
336         }
337 
338       /* Prepare the value of the PDE */
339       u.pde = (struct x86_pde){
340         .present  = TRUE,
341         .write    = 1,
342         .pt_paddr = ((sos_paddr_t)pt_ppage) >> 12
343       };
344 
345       /* Is it a PDE concerning the kernel space */
346       if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
347         {
348           /* Yes: So we need to update the PDE of ALL the mm_contexts
349              in the system */
350 
351           /* First of all: this is a kernel PT */
352           u.pde.user = 0;
353 
354           /* Now synchronize all the PD */
355           SOS_ASSERT_FATAL(SOS_OK ==
356                            sos_mm_context_synch_kernel_PDE(index_in_pd,
357                                                            u.ui32));
358         }
359       else /* We should have written "else if (vpage_vaddr >=
360               SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
361               because the beginning of the function detects and
362               rejects mapping requests inside the mirroring */
363         {
364           /* No: The request concerns the user space. So only the
365              current MMU context is concerned */
366 
367           /* First of all: this is a user PT */
368           u.pde.user = 1;
369 
370           /* Now update the current PD */
371           pd[index_in_pd] = u.pde;
372         }
373       
374       /*
375        * The PT is now mapped in the PD mirroring
376        */
377 
378       /* Invalidate TLB for the page we just added */
379       invlpg(pt);
380      
381       /* Reset this new PT */
382       memset((void*)pt, 0x0, SOS_PAGE_SIZE);
383     }
384 
385   /* If we allocate a new entry in the PT, increase its occupation
386      count. */
387   if (! pt[index_in_pt].present)
388     sos_physmem_inc_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
389   
390   /* Otherwise, that means that a physical page is implicitely
391      unmapped */
392   else
393     sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
394 
395   /* Map the page in the page table */
396   pt[index_in_pt].present = TRUE;
397   pt[index_in_pt].write   = (flags & SOS_VM_MAP_PROT_WRITE)?1:0;
398   pt[index_in_pt].user    = (is_user_page)?1:0;
399   pt[index_in_pt].paddr   = ppage_paddr >> 12;
400   sos_physmem_ref_physpage_at(ppage_paddr);
401 
402 
403   /*
404    * The page is now mapped in the current address space
405    */
406   
407   /* Invalidate TLB for the page we just added */
408   invlpg(vpage_vaddr);
409 
410   return SOS_OK;
411 }
412 
413 
414 sos_ret_t sos_paging_unmap(sos_vaddr_t vpage_vaddr)
415 {
416   sos_ret_t pt_dec_occupation_retval;
417 
418   /* Get the page directory entry and table entry index for this
419      address */
420   unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
421   unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
422   
423   /* Get the PD of the current context */
424   struct x86_pde *pd = (struct x86_pde*)
425     (SOS_PAGING_MIRROR_VADDR
426      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
427 
428   /* Address of the PT in the mirroring */
429   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
430                                            + SOS_PAGE_SIZE*index_in_pd);
431 
432   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
433 
434   /* No page mapped at this address ? */
435   if (! pd[index_in_pd].present)
436     return -SOS_EINVAL;
437   if (! pt[index_in_pt].present)
438     return -SOS_EINVAL;
439 
440   /* The unmapping of anywhere in the PD mirroring is FORBIDDEN ;) */
441   if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
442       && (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
443     return -SOS_EINVAL;
444 
445   /* Reclaim the physical page */
446   sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
447 
448   /* Unmap the page in the page table */
449   memset(pt + index_in_pt, 0x0, sizeof(struct x86_pte));
450 
451   /* Invalidate TLB for the page we just unmapped */
452   invlpg(vpage_vaddr);
453 
454   /* Reclaim this entry in the PT, which may free the PT */
455   pt_dec_occupation_retval
456     = sos_physmem_dec_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
457   SOS_ASSERT_FATAL(pt_dec_occupation_retval >= 0);
458   if (pt_dec_occupation_retval > 0)
459     /* If the PT is now completely unused... */
460     {
461       x86_pde_val_t u;
462 
463 
464       /*
465        * The PT is not referenced by this PD anymore
466        */
467       sos_physmem_unref_physpage(pd[index_in_pd].pt_paddr << 12);
468 
469 
470       /*
471        * Reset the PDE
472        */
473 
474       /* Mark the PDE as unavailable */
475       u.ui32 = 0;
476 
477       /* Is it a PDE concerning the kernel space */
478       if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
479         {
480           /* Now synchronize all the PD */
481           SOS_ASSERT_FATAL(SOS_OK ==
482                            sos_mm_context_synch_kernel_PDE(index_in_pd,
483                                                            u.ui32));
484         }
485       else /* We should have written "else if (vpage_vaddr >=
486               SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
487               because the beginning of the function detects and
488               rejects mapping requests inside the mirroring */
489         {
490           /* No: The request concerns the user space. So only the
491              current MMU context is concerned */
492           pd[index_in_pd] = u.pde;
493         }
494       
495       /* Update the TLB */
496       invlpg(pt);
497     }
498 
499   return SOS_OK;  
500 }
501 
502 
503 sos_ret_t sos_paging_unmap_interval(sos_vaddr_t vaddr,
504                                     sos_size_t  size)
505 {
506   sos_ret_t retval = 0;
507 
508   if (! SOS_IS_PAGE_ALIGNED(vaddr))
509     return -SOS_EINVAL;
510   if (! SOS_IS_PAGE_ALIGNED(size))
511     return -SOS_EINVAL;
512 
513   for ( ;
514         size >= SOS_PAGE_SIZE ;
515         vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
516     if (SOS_OK == sos_paging_unmap(vaddr))
517       retval += SOS_PAGE_SIZE;
518 
519   return retval;
520 }
521 
522 
523 sos_ui32_t sos_paging_get_prot(sos_vaddr_t vaddr)
524 {
525   sos_ui32_t retval;
526 
527   /* Get the page directory entry and table entry index for this
528      address */
529   unsigned index_in_pd = virt_to_pd_index(vaddr);
530   unsigned index_in_pt = virt_to_pt_index(vaddr);
531   
532   /* Get the PD of the current context */
533   struct x86_pde *pd = (struct x86_pde*)
534     (SOS_PAGING_MIRROR_VADDR
535      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
536 
537   /* Address of the PT in the mirroring */
538   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
539                                            + SOS_PAGE_SIZE*index_in_pd);
540 
541   /* No page mapped at this address ? */
542   if (! pd[index_in_pd].present)
543     return SOS_VM_MAP_PROT_NONE;
544   if (! pt[index_in_pt].present)
545     return SOS_VM_MAP_PROT_NONE;
546   
547   /* Default access right of an available page is "read" on x86 */
548   retval = SOS_VM_MAP_PROT_READ;
549   if (pd[index_in_pd].write && pt[index_in_pt].write)
550     retval |= SOS_VM_MAP_PROT_WRITE;
551 
552   return retval;
553 }
554 
555 
556 sos_ret_t sos_paging_set_prot(sos_vaddr_t vaddr,
557                               sos_ui32_t  new_prot)
558 {
559   /* Get the page directory entry and table entry index for this
560      address */
561   unsigned index_in_pd = virt_to_pd_index(vaddr);
562   unsigned index_in_pt = virt_to_pt_index(vaddr);
563   
564   /* Get the PD of the current context */
565   struct x86_pde *pd = (struct x86_pde*)
566     (SOS_PAGING_MIRROR_VADDR
567      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
568 
569   /* Address of the PT in the mirroring */
570   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
571                                            + SOS_PAGE_SIZE*index_in_pd);
572 
573   /* EXEC permission ignored on x86 */
574   new_prot &= ~SOS_VM_MAP_PROT_EXEC;
575 
576   /* Check flags */
577   if (new_prot & ~(SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE))
578     return -SOS_EINVAL;
579   if (! (new_prot & SOS_VM_MAP_PROT_READ))
580     /* x86 READ flag always set by default */
581     return -SOS_ENOSUP;
582 
583   /* No page mapped at this address ? */
584   if (! pd[index_in_pd].present)
585     return -SOS_EINVAL;
586   if (! pt[index_in_pt].present)
587     return -SOS_EINVAL;
588 
589   /* Update access rights */
590   pt[index_in_pt].write = ((new_prot & SOS_VM_MAP_PROT_WRITE) != 0);
591   invlpg(vaddr);
592 
593   return SOS_OK;
594 }
595 
596 
597 sos_ret_t sos_paging_set_prot_of_interval(sos_vaddr_t vaddr,
598                                           sos_size_t  size,
599                                           sos_ui32_t  new_prot)
600 {
601   if (! SOS_IS_PAGE_ALIGNED(vaddr))
602     return -SOS_EINVAL;
603   if (! SOS_IS_PAGE_ALIGNED(size))
604     return -SOS_EINVAL;
605 
606   for ( ; size >= SOS_PAGE_SIZE ; vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
607     sos_paging_set_prot(vaddr, new_prot);
608 
609   return SOS_OK;
610 }
611 
612 
613 sos_paddr_t sos_paging_get_paddr(sos_vaddr_t vaddr)
614 {
615   /* Get the page directory entry and table entry index for this
616      address */
617   unsigned index_in_pd = virt_to_pd_index(vaddr);
618   unsigned index_in_pt = virt_to_pt_index(vaddr);
619   unsigned offset_in_page = virt_to_page_offset(vaddr);
620   
621   /* Get the PD of the current context */
622   struct x86_pde *pd = (struct x86_pde*)
623     (SOS_PAGING_MIRROR_VADDR
624      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
625 
626   /* Address of the PT in the mirroring */
627   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
628                                            + SOS_PAGE_SIZE*index_in_pd);
629 
630   /* No page mapped at this address ? */
631   if (! pd[index_in_pd].present)
632     return (sos_paddr_t)NULL;
633   if (! pt[index_in_pt].present)
634     return (sos_paddr_t)NULL;
635 
636   return (pt[index_in_pt].paddr << 12) + offset_in_page;
637 }
638 
639 
640 /* *************************************************
641  * Functions restricted to mm_context module
642  */
643 
644 
645 sos_paddr_t sos_paging_get_current_PD_paddr()
646 {
647   struct x86_pdbr pdbr;
648   asm volatile("movl %%cr3, %0\n": "=r"(pdbr));
649   return (pdbr.pd_paddr << 12);
650 }
651 
652 
653 sos_ret_t sos_paging_set_current_PD_paddr(sos_paddr_t paddr_PD)
654 {
655   struct x86_pdbr pdbr;
656 
657   SOS_ASSERT_FATAL(paddr_PD != 0);
658   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(paddr_PD));
659 
660   /* Setup the value of the PDBR */
661   memset(& pdbr, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
662   pdbr.pd_paddr = (paddr_PD >> 12);
663 
664   /* Configure the MMU according to the PDBR */
665   asm volatile ("movl %0,%%cr3\n" ::"r"(pdbr));
666 
667   return SOS_OK;
668 }
669 
670 
671 sos_ret_t sos_paging_dispose(sos_vaddr_t vaddr_PD)
672 {
673   x86_pde_val_t *pd = (x86_pde_val_t*) vaddr_PD;
674   x86_pte_val_t *pt;
675   int           index_in_pd;
676 
677   /* Allocate 1 page in kernel space to map the PTs in order to
678      unreference the physical pages they reference */
679   pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
680   if (! pt)
681     return -SOS_ENOMEM;
682 
683   /* (Nothing to do in kernel space) */
684 
685   /* Reset all the PTs in user space */
686   for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
687        index_in_pd < 1024 ; /* 1 PDE = 1 PT
688                                = 1024 Pages
689                                = 4MB */
690        index_in_pd ++)
691     {
692       sos_paddr_t paddr_pt = (pd[index_in_pd].pde.pt_paddr << 12);
693       int index_in_pt;
694 
695       /* Nothing to do if there is no PT */
696       if (! pd[index_in_pd].pde.present)
697         {
698           pd[index_in_pd].ui32 = 0;
699           continue;
700         }
701 
702       /* Map this PT inside kernel */
703       SOS_ASSERT_FATAL(SOS_OK
704                        == sos_paging_map(paddr_pt,
705                                          (sos_vaddr_t)pt, FALSE,
706                                          SOS_VM_MAP_PROT_READ
707                                          | SOS_VM_MAP_PROT_WRITE));
708       
709       /* Reset all the mappings in this PT */
710       for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
711         {
712           /* Ignore unmapped PTE */
713           if (! pt[index_in_pt].pte.present)
714             {
715               pt[index_in_pt].ui32 = 0;
716               continue;
717             }
718 
719           /* Unreference the associated page */
720           sos_physmem_unref_physpage(pt[index_in_pt].pte.paddr << 12);
721 
722           /* Decrease occupation count of the PT */
723           sos_physmem_dec_physpage_occupation(paddr_pt);
724 
725           /* Reset PTE */
726           pt[index_in_pt].ui32 = 0;
727         }
728 
729       /* Unmap PT */
730       SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)pt));
731 
732       /* Reset PDE */
733       pd[index_in_pd].ui32 = 0;
734 
735       /* Unreference PT */
736       sos_physmem_unref_physpage(paddr_pt);
737     }
738 
739   /* Unallocate kernel space used for the temporary PT */
740   SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)pt));
741 
742   return SOS_OK;
743 }
744 
745 
746 sos_ret_t sos_paging_copy_kernel_space(sos_vaddr_t dest_vaddr_PD,
747                                        sos_vaddr_t src_vaddr_PD)
748 {
749   x86_pde_val_t *src_pd       = (x86_pde_val_t*) src_vaddr_PD;
750   x86_pde_val_t *dest_pd      = (x86_pde_val_t*) dest_vaddr_PD;
751   sos_paddr_t   dest_paddr_PD = sos_paging_get_paddr(dest_vaddr_PD);
752   x86_pde_val_t mirror_pde;
753   int           index_in_pd;
754 
755   /* Fill destination PD with zeros */
756   memset((void*)dest_vaddr_PD, 0x0, SOS_PAGE_SIZE);
757 
758   /* Synchronize it with the master Kernel MMU context. Stop just
759      before the mirroring ! */
760   for (index_in_pd = 0 ;
761        index_in_pd < (SOS_PAGING_MIRROR_VADDR >> 22) ; /* 1 PDE = 1 PT
762                                                           = 1024 Pages
763                                                           = 4MB */
764        index_in_pd ++)
765     {
766       /* Copy the master's configuration */
767       dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
768 
769       /* We DON'T mark the underlying PT and pages as referenced
770          because all the PD are equivalent in the kernel space: as
771          soon as a page is mapped in the kernel, it is mapped by X
772          address spaces, and as soon as it is unmapped by 1 address
773          space, it is unmapped in all the others. So that for X
774          address spaces, the reference counter will be either 0 or X,
775          and not something else: using the reference counter correctly
776          won't be of any use and would consume some time in updating it. */
777     }
778 
779   /* Setup the mirroring for the new address space */
780   mirror_pde.ui32 = 0;
781   mirror_pde.pde.present  = TRUE;
782   mirror_pde.pde.write    = 1;
783   mirror_pde.pde.user     = 0; /* This is a KERNEL PDE */
784   mirror_pde.pde.pt_paddr = (dest_paddr_PD >> 12);
785   dest_pd[SOS_PAGING_MIRROR_VADDR >> 22].ui32 = mirror_pde.ui32;
786 
787   return SOS_OK;
788 }
789 
790 
791 sos_ret_t sos_paging_copy_user_space(sos_vaddr_t dest_vaddr_PD,
792                                      sos_vaddr_t src_vaddr_PD)
793 {
794   x86_pde_val_t *src_pd  = (x86_pde_val_t*) src_vaddr_PD;
795   x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD;
796   x86_pte_val_t *tmp_src_pt, *tmp_dest_pt;
797   int           index_in_pd;
798 
799   /* Allocate 2 pages in kernel space to map the PT in order to
800      perform the copy of the PTs from source to destination */
801   tmp_src_pt  = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
802   if (! tmp_src_pt)
803     return -SOS_ENOMEM;
804 
805   tmp_dest_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
806   if (! tmp_dest_pt)
807     {
808       sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
809       return -SOS_ENOMEM;
810     }
811 
812   /* Copy each used PT from source to destination */
813   for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
814        index_in_pd < 1024 ; /* 1 PDE = 1 PT
815                                = 1024 Pages
816                                = 4MB */
817        index_in_pd ++)
818     {
819       sos_paddr_t paddr_dest_pt;
820       int         index_in_pt;
821 
822       /* We first litterally copy the source PDE in the destination
823          PDE. However, please bare in mind that, in the end, both
824          won't reference the same physical PT: the destination PDE
825          will be updated (below) to match the address of its own new
826          PT */
827       dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
828 
829       /* Ignore unused PTs */
830       if (! src_pd[index_in_pd].pde.present)
831         continue;
832 
833       /* Allocate the destination PT */
834       paddr_dest_pt = sos_physmem_ref_physpage_new(TRUE);
835       if (NULL == (void*)paddr_dest_pt)
836         {
837           sos_paging_dispose((sos_vaddr_t)dest_vaddr_PD);
838           
839           /* Unallocate temporary kernel space used for the copy */
840           sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt);
841           sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
842           return -SOS_ENOMEM;
843         }
844 
845       /* Map source and destination PT */
846       SOS_ASSERT_FATAL(SOS_OK
847                        == sos_paging_map(src_pd[index_in_pd].pde.pt_paddr << 12,
848                                          (sos_vaddr_t)tmp_src_pt, FALSE,
849                                          SOS_VM_MAP_PROT_READ));
850       SOS_ASSERT_FATAL(SOS_OK
851                        == sos_paging_map(paddr_dest_pt,
852                                          (sos_vaddr_t)tmp_dest_pt, FALSE,
853                                          SOS_VM_MAP_PROT_READ
854                                          | SOS_VM_MAP_PROT_WRITE));
855 
856       /* Copy the contents of the source to the destination PT,
857          updating the reference counts of the pages */
858       for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
859         {
860           /* Copy the source PTE */
861           tmp_dest_pt[index_in_pt].ui32 = tmp_src_pt[index_in_pt].ui32;
862           
863           /* Ignore non-present pages */
864           if (! tmp_dest_pt[index_in_pt].pte.present)
865             continue;
866 
867           /* Reset the dirty/accessed flags */
868           tmp_dest_pt[index_in_pt].pte.accessed = 0;
869           tmp_dest_pt[index_in_pt].pte.dirty    = 0;
870 
871           /* Increase the reference count of the destination page */
872           sos_physmem_ref_physpage_at(tmp_src_pt[index_in_pt].pte.paddr << 12);
873 
874           /* Increase occupation count of the PT */
875           sos_physmem_inc_physpage_occupation(paddr_dest_pt);
876         }
877 
878       /* Unmap the temporary PTs */
879       SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_src_pt));
880       SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_dest_pt));
881 
882       /* Update the destination PDE */
883       dest_pd[index_in_pd].pde.pt_paddr = (paddr_dest_pt >> 12);
884 
885       /* Reset the dirty/accessed flags */
886       dest_pd[index_in_pd].pde.accessed = 0;
887     }
888 
889 
890   /* Unallocate temporary kernel space used for the copy */
891   SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt));
892   SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt));
893 
894   return SOS_OK;
895 }
896 
897 
898 sos_ret_t sos_paging_prepare_COW(sos_uaddr_t base_address,
899                                  sos_size_t length)
900 {
901   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(base_address));
902   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(length));
903   SOS_ASSERT_FATAL(SOS_PAGING_BASE_USER_ADDRESS <= base_address);
904 
905   /* Mark all the pages read-only, when already mapped in physical
906      memory */
907   for ( ;
908        length > 0 ;
909        length -= SOS_PAGE_SIZE, base_address += SOS_PAGE_SIZE)
910     {
911       sos_paging_set_prot(base_address,
912                           SOS_VM_MAP_PROT_READ);
913     }
914 
915   return SOS_OK;
916 }
917 
918 
919 sos_ret_t sos_paging_try_resolve_COW(sos_uaddr_t uaddr)
920 {
921   sos_ret_t refcnt;
922 
923   /* Get the page directory entry and table entry index for this
924      address */
925   unsigned index_in_pd = virt_to_pd_index(uaddr);
926   unsigned index_in_pt = virt_to_pt_index(uaddr);
927   
928   /* Get the PD of the current context */
929   struct x86_pde *pd = (struct x86_pde*)
930     (SOS_PAGING_MIRROR_VADDR
931      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
932 
933   /* Address of the PT in the mirroring */
934   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
935                                            + SOS_PAGE_SIZE*index_in_pd);
936 
937   /* No page mapped at this address ? */
938   if (! pd[index_in_pd].present)
939     return -SOS_EFAULT;
940   if (! pt[index_in_pt].present)
941     return -SOS_EFAULT;
942   
943   /* Read-only PT not supported by kernel ! */
944   if (! pd[index_in_pd].write)
945     return -SOS_EFAULT;
946 
947   /* Cannot understand a COW request if the page is already
948      read/write */
949   SOS_ASSERT_FATAL(! pt[index_in_pt].write);
950 
951   /* We do a private copy of the page only if the current mapped page
952      is shared by more than 1 process */
953   refcnt = sos_physmem_get_physpage_refcount(pt[index_in_pt].paddr << 12);
954   SOS_ASSERT_FATAL(refcnt > 0);
955 
956   if (refcnt == 1)
957     {
958       /* We are the only address space to reference this page, we can
959          safely turn it read/write now */
960       pt[index_in_pt].write = 1;
961       invlpg(pt[index_in_pt].paddr << 12);
962     }
963 
964   /* Otherwise we need to make a private copy of the page */
965   else
966     {
967       sos_paddr_t new_ppage;
968       sos_vaddr_t vpage_src, tmp_dest;
969 
970       /* For that, we allocate the destination page inside the kernel
971          space to perform the copy. We will transfer it into its
972          final user-space address later */
973       tmp_dest = sos_kmem_vmm_alloc(1, SOS_KMEM_VMM_MAP);
974       if (! tmp_dest)
975         return -SOS_ENOMEM;
976 
977       /* copy the contents of the page */
978       vpage_src = SOS_PAGE_ALIGN_INF(uaddr);
979       memcpy((void*)tmp_dest, (void*)vpage_src, SOS_PAGE_SIZE);
980 
981       /* replace the original (read-only) mapping with a (read/write)
982          mapping to the new page. This will automatically unreference
983          the original page */
984       new_ppage = sos_paging_get_paddr(tmp_dest);
985       SOS_ASSERT_FATAL(new_ppage != (sos_vaddr_t)NULL);
986       if (SOS_OK != sos_paging_map(new_ppage, vpage_src,
987                                    TRUE,
988                                    SOS_VM_MAP_PROT_READ
989                                    | SOS_VM_MAP_PROT_WRITE))
990         {
991           sos_kmem_vmm_free(tmp_dest);
992           return -SOS_ENOMEM;
993         }
994 
995       /* We can now unmap the destination page from inside the
996          kernel and free the kernel VM range for it */
997       SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free(tmp_dest));
998     }
999 
1000   /* That's all, folks ! */
1001   return SOS_OK;
1002 }

source navigation ] diff markup ] identifier search ] general search ]