SimpleOS

LXR

Navigation



Site hébergé par : enix

The LXR Cross Referencer for SOS

source navigation ]
diff markup ]
identifier search ]
general search ]
 
 
Article:1 ] [ 2 ] [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 6.5 ] [ 7 ] [ 7.5 ] [ 8 ] [ 9 ] [ 9.5 ]

001 /* Copyright (C) 2004  David Decotigny
002 
003    This program is free software; you can redistribute it and/or
004    modify it under the terms of the GNU General Public License
005    as published by the Free Software Foundation; either version 2
006    of the License, or (at your option) any later version.
007    
008    This program is distributed in the hope that it will be useful,
009    but WITHOUT ANY WARRANTY; without even the implied warranty of
010    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
011    GNU General Public License for more details.
012    
013    You should have received a copy of the GNU General Public License
014    along with this program; if not, write to the Free Software
015    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
016    USA. 
017 */
018 #include <sos/physmem.h>
019 #include <sos/klibc.h>
020 #include <sos/assert.h>
021 
022 #include "mm_context.h"
023 
024 #include "paging.h"
025 
026 
027 /*
028  * Important NOTICE concerning the use of the reference & occupation
029  * counters of the physical pages by the "paging" subsystem:
030  *   - All the kernel PT are SHARED. This means that as soon as one
031  *     kernel PT belongs to one mm_context, it belongs to ALL the
032  *     mm_contexts. We don't update the real reference count of the PT
033  *     in this respect, because it would require to update the
034  *     reference counts of ALL the kernel PTs as soon as a new
035  *     mm_context is created, or as soon as a mm_context is
036  *     suppressed. This way, the reference count is constant
037  *     independently of the actual number of PD really sharing them.
038  *   - We do NOT maintain the occupation count of the PDs. This would add
039  *     some little overhead that is useless
040  *   - We do maintain the occupation count of ALL the PTs: it represents the
041  *     number of PTE allocated in the PT
042  */
043 
044 
045 /** The structure of a page directory entry. See Intel vol 3 section
046     3.6.4 */
047 struct x86_pde
048 {
049   sos_ui32_t present        :1; /* 1=PT mapped */
050   sos_ui32_t write          :1; /* 0=read-only, 1=read/write */
051   sos_ui32_t user           :1; /* 0=supervisor, 1=user */
052   sos_ui32_t write_through  :1; /* 0=write-back, 1=write-through */
053   sos_ui32_t cache_disabled :1; /* 1=cache disabled */
054   sos_ui32_t accessed       :1; /* 1=read/write access since last clear */
055   sos_ui32_t zero           :1; /* Intel reserved */
056   sos_ui32_t page_size      :1; /* 0=4kB, 1=4MB or 2MB (depending on PAE) */
057   sos_ui32_t global_page    :1; /* Ignored (Intel reserved) */
058   sos_ui32_t custom         :3; /* Do what you want with them */
059   sos_ui32_t pt_paddr       :20;
060 } __attribute__ ((packed));
061 
062 
063 /** Intermediate type to speed up PDE copy */
064 typedef union {
065   struct x86_pde pde;
066   sos_ui32_t     ui32;
067 } x86_pde_val_t;
068 
069 
070 /** The structure of a page table entry. See Intel vol 3 section
071     3.6.4 */
072 struct x86_pte
073 {
074   sos_ui32_t present        :1; /* 1=PT mapped */
075   sos_ui32_t write          :1; /* 0=read-only, 1=read/write */
076   sos_ui32_t user           :1; /* 0=supervisor, 1=user */
077   sos_ui32_t write_through  :1; /* 0=write-back, 1=write-through */
078   sos_ui32_t cache_disabled :1; /* 1=cache disabled */
079   sos_ui32_t accessed       :1; /* 1=read/write access since last clear */
080   sos_ui32_t dirty          :1; /* 1=write access since last clear */
081   sos_ui32_t zero           :1; /* Intel reserved */
082   sos_ui32_t global_page    :1; /* 1=No TLB invalidation upon cr3 switch
083                                    (when PG set in cr4) */
084   sos_ui32_t custom         :3; /* Do what you want with them */
085   sos_ui32_t paddr          :20;
086 } __attribute__ ((packed));
087 
088 
089 /** Intermediate type to speed up PTE copy */
090 typedef union {
091   struct x86_pte pte;
092   sos_ui32_t     ui32;
093 } x86_pte_val_t;
094 
095 
096 /** Structure of the x86 CR3 register: the Page Directory Base
097     Register. See Intel x86 doc Vol 3 section 2.5 */
098 struct x86_pdbr
099 {
100   sos_ui32_t zero1          :3; /* Intel reserved */
101   sos_ui32_t write_through  :1; /* 0=write-back, 1=write-through */
102   sos_ui32_t cache_disabled :1; /* 1=cache disabled */
103   sos_ui32_t zero2          :7; /* Intel reserved */
104   sos_ui32_t pd_paddr       :20;
105 } __attribute__ ((packed));
106 
107 
108 /**
109  * Helper macro to control the MMU: invalidate the TLB entry for the
110  * page located at the given virtual address. See Intel x86 vol 3
111  * section 3.7.
112  */
113 #define invlpg(vaddr) \
114   do { \
115        __asm__ __volatile__("invlpg %0"::"m"(*((unsigned *)(vaddr)))); \
116   } while(0)
117 
118 
119 /**
120  * Helper macro to control the MMU: invalidate the whole TLB. See
121  * Intel x86 vol 3 section 3.7.
122  */
123 #define flush_tlb() \
124   do { \
125         unsigned long tmpreg; \
126         asm volatile("movl %%cr3,%0\n\tmovl %0,%%cr3" :"=r" \
127                      (tmpreg) : :"memory"); \
128   } while (0)
129 
130 
131 /**
132  * Helper macro to compute the index in the PD for the given virtual
133  * address
134  */
135 #define virt_to_pd_index(vaddr) \
136   (((unsigned)(vaddr)) >> 22)
137 
138 
139 /**
140  * Helper macro to compute the index in the PT for the given virtual
141  * address
142  */
143 #define virt_to_pt_index(vaddr) \
144   ( (((unsigned)(vaddr)) >> 12) & 0x3ff )
145 
146 
147 /**
148  * Helper macro to compute the offset in the page for the given virtual
149  * address
150  */
151 #define virt_to_page_offset(vaddr) \
152   (((unsigned)(vaddr)) & SOS_PAGE_MASK)
153 
154 
155 /**
156  * Helper function to map a page in the pd.\ Suppose that the RAM
157  * is identity mapped to resolve PT actual (CPU) address from the PD
158  * entry
159  */
160 static sos_ret_t paging_setup_map_helper(struct x86_pde * pd,
161                                          sos_paddr_t ppage,
162                                          sos_vaddr_t vaddr)
163 {
164   /* Get the page directory entry and table entry index for this
165      address */
166   unsigned index_in_pd = virt_to_pd_index(vaddr);
167   unsigned index_in_pt = virt_to_pt_index(vaddr);
168 
169   /* Make sure the page table was mapped */
170   struct x86_pte * pt;
171   if (pd[index_in_pd].present)
172     {
173       pt = (struct x86_pte*) (pd[index_in_pd].pt_paddr << 12);
174 
175       /* This test will always be TRUE here, since the setup routine
176          scans the kernel pages in a strictly increasing order: at
177          each step, the map will result in the allocation of a new PT
178          entry. For the sake of clarity, we keep the test here. */
179       if (pt[index_in_pt].present)
180         SOS_ASSERT_FATAL(FALSE); /* indicate a fatal error */
181     }
182   else
183     {
184       /* No : allocate a new one */
185       pt = (struct x86_pte*) sos_physmem_ref_physpage_new(FALSE);
186       if (! pt)
187         return -SOS_ENOMEM;
188       
189       memset((void*)pt, 0x0, SOS_PAGE_SIZE);
190 
191       pd[index_in_pd].present  = TRUE;
192       pd[index_in_pd].write    = 1; /* It would be too complicated to
193                                        determine whether it
194                                        corresponds to a real R/W area
195                                        of the kernel code/data or
196                                        read-only */
197       pd[index_in_pd].pt_paddr = ((sos_paddr_t)pt) >> 12;
198     }
199 
200   
201   /* Map the page in the page table */
202   pt[index_in_pt].present = 1;
203   pt[index_in_pt].write   = 1;  /* It would be too complicated to
204                                    determine whether it corresponds to
205                                    a real R/W area of the kernel
206                                    code/data or R/O only */
207   pt[index_in_pt].user    = 0;
208   pt[index_in_pt].paddr   = ppage >> 12;
209 
210   /* Increase the PT's occupation count because we allocated a new PTE
211      inside it */
212   sos_physmem_inc_physpage_occupation((sos_paddr_t)pt);
213 
214   return SOS_OK;
215 }
216 
217 
218 sos_ret_t sos_paging_subsystem_setup(sos_paddr_t identity_mapping_base,
219                                      sos_paddr_t identity_mapping_top)
220 {
221   /* The PDBR we will setup below */
222   struct x86_pdbr cr3;  
223 
224   /* Get the PD for the kernel */
225   struct x86_pde * pd
226     = (struct x86_pde*) sos_physmem_ref_physpage_new(FALSE);
227 
228   /* The iterator for scanning the kernel area */
229   sos_paddr_t paddr;
230 
231   /* Reset the PD. For the moment, there is still an IM for the whole
232      RAM, so that the paddr are also vaddr */
233   memset((void*)pd,
234          0x0,
235          SOS_PAGE_SIZE);
236 
237   /* Identity-map the identity_mapping_* area */
238   for (paddr = identity_mapping_base ;
239        paddr < identity_mapping_top ;
240        paddr += SOS_PAGE_SIZE)
241     {
242       if (paging_setup_map_helper(pd, paddr, paddr))
243         return -SOS_ENOMEM;
244     }
245 
246   /* Identity-map the PC-specific BIOS/Video area */
247   for (paddr = BIOS_N_VIDEO_START ;
248        paddr < BIOS_N_VIDEO_END ;
249        paddr += SOS_PAGE_SIZE)
250     {
251       if (paging_setup_map_helper(pd, paddr, paddr))
252         return -SOS_ENOMEM;
253     }
254 
255   /* Ok, kernel is now identity mapped in the PD. We still have to set
256      up the mirroring */
257   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].present = TRUE;
258   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].write = 1;
259   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].user  = 0;
260   pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].pt_paddr 
261     = ((sos_paddr_t)pd)>>12;
262 
263   /* We now just have to configure the MMU to use our PD. See Intel
264      x86 doc vol 3, section 3.6.3 */
265   memset(& cr3, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
266   cr3.pd_paddr = ((sos_paddr_t)pd) >> 12;
267 
268  /* Actual loading of the PDBR in the MMU: setup cr3 + bits 31[Paging
269     Enabled] and 16[Write Protect] of cr0, see Intel x86 doc vol 3,
270     sections 2.5, 3.6.1 and 4.11.3 + note table 4-2 */
271   asm volatile ("movl %0,%%cr3\n\t"
272                 "movl %%cr0,%%eax\n\t"
273                 "orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */
274                 "movl %%eax,%%cr0\n\t"
275                 "jmp 1f\n\t"
276                 "1:\n\t"
277                 "movl $2f, %%eax\n\t"
278                 "jmp *%%eax\n\t"
279                 "2:\n\t" ::"r"(cr3):"memory","eax");
280 
281   /*
282    * Here, the only memory available is:
283    * - The BIOS+video area
284    * - the identity_mapping_base .. identity_mapping_top area
285    * - the PD mirroring area (4M)
286    * All accesses to other virtual addresses will generate a #PF
287    */
288 
289   return SOS_OK;
290 }
291 
292 
293 /* Suppose that the current address is configured with the mirroring
294  * enabled to access the PD and PT. */
295 sos_ret_t sos_paging_map(sos_paddr_t ppage_paddr,
296                          sos_vaddr_t vpage_vaddr,
297                          sos_bool_t is_user_page,
298                          sos_ui32_t flags)
299 {
300   /* Get the page directory entry and table entry index for this
301      address */
302   unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
303   unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
304   
305   /* Get the PD of the current context */
306   struct x86_pde *pd = (struct x86_pde*)
307     (SOS_PAGING_MIRROR_VADDR
308      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
309 
310   /* Address of the PT in the mirroring */
311   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
312                                            + SOS_PAGE_SIZE*index_in_pd);
313 
314   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(ppage_paddr));
315   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
316 
317   /* EXEC permission ignored on x86 */
318   flags &= ~SOS_VM_MAP_PROT_EXEC;
319 
320   /* The mapping of anywhere in the PD mirroring is FORBIDDEN ;) */
321   if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
322       && (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
323     return -SOS_EINVAL;
324 
325   /* Map a page for the PT if necessary */
326   if (! pd[index_in_pd].present)
327     {
328       x86_pde_val_t u;
329       
330       /* No : allocate a new one */
331       sos_paddr_t pt_ppage
332         = sos_physmem_ref_physpage_new(! (flags & SOS_VM_MAP_ATOMIC));
333       if (! pt_ppage)
334         {
335           return -SOS_ENOMEM;
336         }
337 
338       /* Prepare the value of the PDE */
339       u.pde = (struct x86_pde){
340         .present  = TRUE,
341         .write    = 1,
342         .pt_paddr = ((sos_paddr_t)pt_ppage) >> 12
343       };
344 
345       /* Is it a PDE concerning the kernel space */
346       if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
347         {
348           /* Yes: So we need to update the PDE of ALL the mm_contexts
349              in the system */
350 
351           /* First of all: this is a kernel PT */
352           u.pde.user = 0;
353 
354           /* Now synchronize all the PD */
355           SOS_ASSERT_FATAL(SOS_OK ==
356                            sos_mm_context_synch_kernel_PDE(index_in_pd,
357                                                            u.ui32));
358         }
359       else /* We should have written "else if (vpage_vaddr >=
360               SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
361               because the beginning of the function detects and
362               rejects mapping requests inside the mirroring */
363         {
364           /* No: The request concerns the user space. So only the
365              current MMU context is concerned */
366 
367           /* First of all: this is a user PT */
368           u.pde.user = 1;
369 
370           /* Now update the current PD */
371           pd[index_in_pd] = u.pde;
372         }
373       
374       /*
375        * The PT is now mapped in the PD mirroring
376        */
377 
378       /* Invalidate TLB for the page we just added */
379       invlpg(pt);
380      
381       /* Reset this new PT */
382       memset((void*)pt, 0x0, SOS_PAGE_SIZE);
383     }
384 
385   /* If we allocate a new entry in the PT, increase its occupation
386      count. */
387   if (! pt[index_in_pt].present)
388     sos_physmem_inc_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
389   
390   /* Otherwise, that means that a physical page is implicitely
391      unmapped */
392   else
393     sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
394 
395   /* Map the page in the page table */
396   pt[index_in_pt].present = TRUE;
397   pt[index_in_pt].write   = (flags & SOS_VM_MAP_PROT_WRITE)?1:0;
398   pt[index_in_pt].user    = (is_user_page)?1:0;
399   pt[index_in_pt].paddr   = ppage_paddr >> 12;
400   sos_physmem_ref_physpage_at(ppage_paddr);
401 
402 
403   /*
404    * The page is now mapped in the current address space
405    */
406   
407   /* Invalidate TLB for the page we just added */
408   invlpg(vpage_vaddr);
409 
410   return SOS_OK;
411 }
412 
413 
414 sos_ret_t sos_paging_unmap(sos_vaddr_t vpage_vaddr)
415 {
416   sos_ret_t pt_dec_occupation_retval;
417 
418   /* Get the page directory entry and table entry index for this
419      address */
420   unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
421   unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
422   
423   /* Get the PD of the current context */
424   struct x86_pde *pd = (struct x86_pde*)
425     (SOS_PAGING_MIRROR_VADDR
426      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
427 
428   /* Address of the PT in the mirroring */
429   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
430                                            + SOS_PAGE_SIZE*index_in_pd);
431 
432   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
433 
434   /* No page mapped at this address ? */
435   if (! pd[index_in_pd].present)
436     return -SOS_EINVAL;
437   if (! pt[index_in_pt].present)
438     return -SOS_EINVAL;
439 
440   /* The unmapping of anywhere in the PD mirroring is FORBIDDEN ;) */
441   if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
442       && (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
443     return -SOS_EINVAL;
444 
445   /* Reclaim the physical page */
446   sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
447 
448   /* Unmap the page in the page table */
449   memset(pt + index_in_pt, 0x0, sizeof(struct x86_pte));
450 
451   /* Invalidate TLB for the page we just unmapped */
452   invlpg(vpage_vaddr);
453 
454   /* Reclaim this entry in the PT, which may free the PT */
455   pt_dec_occupation_retval
456     = sos_physmem_dec_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
457   SOS_ASSERT_FATAL(pt_dec_occupation_retval >= 0);
458   if (pt_dec_occupation_retval > 0)
459     /* If the PT is now completely unused... */
460     {
461       x86_pde_val_t u;
462 
463 
464       /*
465        * The PT is not referenced by this PD anymore
466        */
467       sos_physmem_unref_physpage(pd[index_in_pd].pt_paddr << 12);
468 
469 
470       /*
471        * Reset the PDE
472        */
473 
474       /* Mark the PDE as unavailable */
475       u.ui32 = 0;
476 
477       /* Is it a PDE concerning the kernel space */
478       if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
479         {
480           /* Now synchronize all the PD */
481           SOS_ASSERT_FATAL(SOS_OK ==
482                            sos_mm_context_synch_kernel_PDE(index_in_pd,
483                                                            u.ui32));
484         }
485       else /* We should have written "else if (vpage_vaddr >=
486               SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
487               because the beginning of the function detects and
488               rejects mapping requests inside the mirroring */
489         {
490           /* No: The request concerns the user space. So only the
491              current MMU context is concerned */
492           pd[index_in_pd] = u.pde;
493         }
494       
495       /* Update the TLB */
496       invlpg(pt);
497     }
498 
499   return SOS_OK;  
500 }
501 
502 
503 sos_ret_t sos_paging_unmap_interval(sos_vaddr_t vaddr,
504                                     sos_size_t  size)
505 {
506   sos_ret_t retval = 0;
507 
508   if (! SOS_IS_PAGE_ALIGNED(vaddr))
509     return -SOS_EINVAL;
510   if (! SOS_IS_PAGE_ALIGNED(size))
511     return -SOS_EINVAL;
512 
513   for ( ;
514         size >= SOS_PAGE_SIZE ;
515         vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
516     if (SOS_OK == sos_paging_unmap(vaddr))
517       retval += SOS_PAGE_SIZE;
518 
519   return retval;
520 }
521 
522 
523 sos_ui32_t sos_paging_get_prot(sos_vaddr_t vaddr)
524 {
525   sos_ui32_t retval;
526 
527   /* Get the page directory entry and table entry index for this
528      address */
529   unsigned index_in_pd = virt_to_pd_index(vaddr);
530   unsigned index_in_pt = virt_to_pt_index(vaddr);
531   
532   /* Get the PD of the current context */
533   struct x86_pde *pd = (struct x86_pde*)
534     (SOS_PAGING_MIRROR_VADDR
535      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
536 
537   /* Address of the PT in the mirroring */
538   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
539                                            + SOS_PAGE_SIZE*index_in_pd);
540 
541   /* No page mapped at this address ? */
542   if (! pd[index_in_pd].present)
543     return SOS_VM_MAP_PROT_NONE;
544   if (! pt[index_in_pt].present)
545     return SOS_VM_MAP_PROT_NONE;
546   
547   /* Default access right of an available page is "read" on x86 */
548   retval = SOS_VM_MAP_PROT_READ;
549   if (pd[index_in_pd].write && pt[index_in_pt].write)
550     retval |= SOS_VM_MAP_PROT_WRITE;
551 
552   return retval;
553 }
554 
555 
556 sos_ret_t sos_paging_set_prot(sos_vaddr_t vaddr,
557                               sos_ui32_t  new_prot)
558 {
559   /* Get the page directory entry and table entry index for this
560      address */
561   unsigned index_in_pd = virt_to_pd_index(vaddr);
562   unsigned index_in_pt = virt_to_pt_index(vaddr);
563   
564   /* Get the PD of the current context */
565   struct x86_pde *pd = (struct x86_pde*)
566     (SOS_PAGING_MIRROR_VADDR
567      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
568 
569   /* Address of the PT in the mirroring */
570   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
571                                            + SOS_PAGE_SIZE*index_in_pd);
572 
573   /* EXEC permission ignored on x86 */
574   new_prot &= ~SOS_VM_MAP_PROT_EXEC;
575 
576   /* Check flags */
577   if (new_prot & ~(SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE))
578     return -SOS_EINVAL;
579   if (! (new_prot & SOS_VM_MAP_PROT_READ))
580     /* x86 READ flag always set by default */
581     return -SOS_ENOSUP;
582 
583   /* No page mapped at this address ? */
584   if (! pd[index_in_pd].present)
585     return -SOS_EINVAL;
586   if (! pt[index_in_pt].present)
587     return -SOS_EINVAL;
588 
589   /* Update access rights */
590   pt[index_in_pt].write = ((new_prot & SOS_VM_MAP_PROT_WRITE) != 0);
591   invlpg(vaddr);
592 
593   return SOS_OK;
594 }
595 
596 
597 sos_ret_t sos_paging_set_prot_of_interval(sos_vaddr_t vaddr,
598                                           sos_size_t  size,
599                                           sos_ui32_t  new_prot)
600 {
601   if (! SOS_IS_PAGE_ALIGNED(vaddr))
602     return -SOS_EINVAL;
603   if (! SOS_IS_PAGE_ALIGNED(size))
604     return -SOS_EINVAL;
605 
606   for ( ; size >= SOS_PAGE_SIZE ; vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
607     sos_paging_set_prot(vaddr, new_prot);
608 
609   return SOS_OK;
610 }
611 
612 
613 sos_bool_t sos_paging_is_dirty(sos_vaddr_t vaddr)
614 {
615   /* Get the page directory entry and table entry index for this
616      address */
617   unsigned index_in_pd = virt_to_pd_index(vaddr);
618   unsigned index_in_pt = virt_to_pt_index(vaddr);
619   
620   /* Get the PD of the current context */
621   struct x86_pde *pd = (struct x86_pde*)
622     (SOS_PAGING_MIRROR_VADDR
623      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
624 
625   /* Address of the PT in the mirroring */
626   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
627                                            + SOS_PAGE_SIZE*index_in_pd);
628 
629   /* No page mapped at this address ? */
630   if (! pd[index_in_pd].present)
631     return FALSE;
632   if (! pt[index_in_pt].present)
633     return FALSE;
634 
635   return (pt[index_in_pt].dirty != 0);
636 }
637 
638 
639 sos_ret_t sos_paging_set_dirty(sos_vaddr_t vaddr,
640                                sos_bool_t is_dirty)
641 {
642   /* Get the page directory entry and table entry index for this
643      address */
644   unsigned index_in_pd = virt_to_pd_index(vaddr);
645   unsigned index_in_pt = virt_to_pt_index(vaddr);
646   
647   /* Get the PD of the current context */
648   struct x86_pde *pd = (struct x86_pde*)
649     (SOS_PAGING_MIRROR_VADDR
650      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
651 
652   /* Address of the PT in the mirroring */
653   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
654                                            + SOS_PAGE_SIZE*index_in_pd);
655 
656   /* No page mapped at this address ? */
657   if (! pd[index_in_pd].present)
658     return -SOS_EFAULT;
659   if (! pt[index_in_pt].present)
660     return -SOS_EFAULT;
661 
662   pt[index_in_pt].dirty = is_dirty;
663   return SOS_OK;
664 }
665 
666 
667 sos_paddr_t sos_paging_get_paddr(sos_vaddr_t vaddr)
668 {
669   /* Get the page directory entry and table entry index for this
670      address */
671   unsigned index_in_pd = virt_to_pd_index(vaddr);
672   unsigned index_in_pt = virt_to_pt_index(vaddr);
673   unsigned offset_in_page = virt_to_page_offset(vaddr);
674   
675   /* Get the PD of the current context */
676   struct x86_pde *pd = (struct x86_pde*)
677     (SOS_PAGING_MIRROR_VADDR
678      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
679 
680   /* Address of the PT in the mirroring */
681   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
682                                            + SOS_PAGE_SIZE*index_in_pd);
683 
684   /* No page mapped at this address ? */
685   if (! pd[index_in_pd].present)
686     return (sos_paddr_t)NULL;
687   if (! pt[index_in_pt].present)
688     return (sos_paddr_t)NULL;
689 
690   return (pt[index_in_pt].paddr << 12) + offset_in_page;
691 }
692 
693 
694 /* *************************************************
695  * Functions restricted to mm_context module
696  */
697 
698 
699 sos_paddr_t sos_paging_get_current_PD_paddr(void)
700 {
701   struct x86_pdbr pdbr;
702   asm volatile("movl %%cr3, %0\n": "=r"(pdbr));
703   return (pdbr.pd_paddr << 12);
704 }
705 
706 
707 sos_ret_t sos_paging_set_current_PD_paddr(sos_paddr_t paddr_PD)
708 {
709   struct x86_pdbr pdbr;
710 
711   SOS_ASSERT_FATAL(paddr_PD != 0);
712   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(paddr_PD));
713 
714   /* Setup the value of the PDBR */
715   memset(& pdbr, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
716   pdbr.pd_paddr = (paddr_PD >> 12);
717 
718   /* Configure the MMU according to the PDBR */
719   asm volatile ("movl %0,%%cr3\n" ::"r"(pdbr));
720 
721   return SOS_OK;
722 }
723 
724 
725 sos_ret_t sos_paging_dispose(sos_vaddr_t vaddr_PD)
726 {
727   x86_pde_val_t *pd = (x86_pde_val_t*) vaddr_PD;
728   x86_pte_val_t *pt;
729   int           index_in_pd;
730 
731   /* Allocate 1 page in kernel space to map the PTs in order to
732      unreference the physical pages they reference */
733   pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
734   if (! pt)
735     return -SOS_ENOMEM;
736 
737   /* (Nothing to do in kernel space) */
738 
739   /* Reset all the PTs in user space */
740   for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
741        index_in_pd < 1024 ; /* 1 PDE = 1 PT
742                                = 1024 Pages
743                                = 4MB */
744        index_in_pd ++)
745     {
746       sos_paddr_t paddr_pt = (pd[index_in_pd].pde.pt_paddr << 12);
747       int index_in_pt;
748 
749       /* Nothing to do if there is no PT */
750       if (! pd[index_in_pd].pde.present)
751         {
752           pd[index_in_pd].ui32 = 0;
753           continue;
754         }
755 
756       /* Map this PT inside kernel */
757       SOS_ASSERT_FATAL(SOS_OK
758                        == sos_paging_map(paddr_pt,
759                                          (sos_vaddr_t)pt, FALSE,
760                                          SOS_VM_MAP_PROT_READ
761                                          | SOS_VM_MAP_PROT_WRITE));
762       
763       /* Reset all the mappings in this PT */
764       for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
765         {
766           /* Ignore unmapped PTE */
767           if (! pt[index_in_pt].pte.present)
768             {
769               pt[index_in_pt].ui32 = 0;
770               continue;
771             }
772 
773           /* Unreference the associated page */
774           sos_physmem_unref_physpage(pt[index_in_pt].pte.paddr << 12);
775 
776           /* Decrease occupation count of the PT */
777           sos_physmem_dec_physpage_occupation(paddr_pt);
778 
779           /* Reset PTE */
780           pt[index_in_pt].ui32 = 0;
781         }
782 
783       /* Unmap PT */
784       SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)pt));
785 
786       /* Reset PDE */
787       pd[index_in_pd].ui32 = 0;
788 
789       /* Unreference PT */
790       sos_physmem_unref_physpage(paddr_pt);
791     }
792 
793   /* Unallocate kernel space used for the temporary PT */
794   SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)pt));
795 
796   return SOS_OK;
797 }
798 
799 
800 sos_ret_t sos_paging_copy_kernel_space(sos_vaddr_t dest_vaddr_PD,
801                                        sos_vaddr_t src_vaddr_PD)
802 {
803   x86_pde_val_t *src_pd       = (x86_pde_val_t*) src_vaddr_PD;
804   x86_pde_val_t *dest_pd      = (x86_pde_val_t*) dest_vaddr_PD;
805   sos_paddr_t   dest_paddr_PD = sos_paging_get_paddr(dest_vaddr_PD);
806   x86_pde_val_t mirror_pde;
807   int           index_in_pd;
808 
809   /* Fill destination PD with zeros */
810   memset((void*)dest_vaddr_PD, 0x0, SOS_PAGE_SIZE);
811 
812   /* Synchronize it with the master Kernel MMU context. Stop just
813      before the mirroring ! */
814   for (index_in_pd = 0 ;
815        index_in_pd < (SOS_PAGING_MIRROR_VADDR >> 22) ; /* 1 PDE = 1 PT
816                                                           = 1024 Pages
817                                                           = 4MB */
818        index_in_pd ++)
819     {
820       /* Copy the master's configuration */
821       dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
822 
823       /* We DON'T mark the underlying PT and pages as referenced
824          because all the PD are equivalent in the kernel space: as
825          soon as a page is mapped in the kernel, it is mapped by X
826          address spaces, and as soon as it is unmapped by 1 address
827          space, it is unmapped in all the others. So that for X
828          address spaces, the reference counter will be either 0 or X,
829          and not something else: using the reference counter correctly
830          won't be of any use and would consume some time in updating it. */
831     }
832 
833   /* Setup the mirroring for the new address space */
834   mirror_pde.ui32 = 0;
835   mirror_pde.pde.present  = TRUE;
836   mirror_pde.pde.write    = 1;
837   mirror_pde.pde.user     = 0; /* This is a KERNEL PDE */
838   mirror_pde.pde.pt_paddr = (dest_paddr_PD >> 12);
839   dest_pd[SOS_PAGING_MIRROR_VADDR >> 22].ui32 = mirror_pde.ui32;
840 
841   return SOS_OK;
842 }
843 
844 
845 sos_ret_t sos_paging_copy_user_space(sos_vaddr_t dest_vaddr_PD,
846                                      sos_vaddr_t src_vaddr_PD)
847 {
848   x86_pde_val_t *src_pd  = (x86_pde_val_t*) src_vaddr_PD;
849   x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD;
850   x86_pte_val_t *tmp_src_pt, *tmp_dest_pt;
851   int           index_in_pd;
852 
853   /* Allocate 2 pages in kernel space to map the PT in order to
854      perform the copy of the PTs from source to destination */
855   tmp_src_pt  = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
856   if (! tmp_src_pt)
857     return -SOS_ENOMEM;
858 
859   tmp_dest_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
860   if (! tmp_dest_pt)
861     {
862       sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
863       return -SOS_ENOMEM;
864     }
865 
866   /* Copy each used PT from source to destination */
867   for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
868        index_in_pd < 1024 ; /* 1 PDE = 1 PT
869                                = 1024 Pages
870                                = 4MB */
871        index_in_pd ++)
872     {
873       sos_paddr_t paddr_dest_pt;
874       int         index_in_pt;
875 
876       /* We first litterally copy the source PDE in the destination
877          PDE. However, please bare in mind that, in the end, both
878          won't reference the same physical PT: the destination PDE
879          will be updated (below) to match the address of its own new
880          PT */
881       dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
882 
883       /* Ignore unused PTs */
884       if (! src_pd[index_in_pd].pde.present)
885         continue;
886 
887       /* Allocate the destination PT */
888       paddr_dest_pt = sos_physmem_ref_physpage_new(TRUE);
889       if (NULL == (void*)paddr_dest_pt)
890         {
891           sos_paging_dispose((sos_vaddr_t)dest_vaddr_PD);
892           
893           /* Unallocate temporary kernel space used for the copy */
894           sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt);
895           sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
896           return -SOS_ENOMEM;
897         }
898 
899       /* Map source and destination PT */
900       SOS_ASSERT_FATAL(SOS_OK
901                        == sos_paging_map(src_pd[index_in_pd].pde.pt_paddr << 12,
902                                          (sos_vaddr_t)tmp_src_pt, FALSE,
903                                          SOS_VM_MAP_PROT_READ));
904       SOS_ASSERT_FATAL(SOS_OK
905                        == sos_paging_map(paddr_dest_pt,
906                                          (sos_vaddr_t)tmp_dest_pt, FALSE,
907                                          SOS_VM_MAP_PROT_READ
908                                          | SOS_VM_MAP_PROT_WRITE));
909 
910       /* Copy the contents of the source to the destination PT,
911          updating the reference counts of the pages */
912       for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
913         {
914           /* Copy the source PTE */
915           tmp_dest_pt[index_in_pt].ui32 = tmp_src_pt[index_in_pt].ui32;
916           
917           /* Ignore non-present pages */
918           if (! tmp_dest_pt[index_in_pt].pte.present)
919             continue;
920 
921           /* Reset the dirty/accessed flags */
922           tmp_dest_pt[index_in_pt].pte.accessed = 0;
923           tmp_dest_pt[index_in_pt].pte.dirty    = 0;
924 
925           /* Increase the reference count of the destination page */
926           sos_physmem_ref_physpage_at(tmp_src_pt[index_in_pt].pte.paddr << 12);
927 
928           /* Increase occupation count of the PT */
929           sos_physmem_inc_physpage_occupation(paddr_dest_pt);
930         }
931 
932       /* Unmap the temporary PTs */
933       SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_src_pt));
934       SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_dest_pt));
935 
936       /* Update the destination PDE */
937       dest_pd[index_in_pd].pde.pt_paddr = (paddr_dest_pt >> 12);
938 
939       /* Reset the dirty/accessed flags */
940       dest_pd[index_in_pd].pde.accessed = 0;
941     }
942 
943 
944   /* Unallocate temporary kernel space used for the copy */
945   SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt));
946   SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt));
947 
948   return SOS_OK;
949 }
950 
951 
952 sos_ret_t sos_paging_prepare_COW(sos_uaddr_t base_address,
953                                  sos_size_t length)
954 {
955   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(base_address));
956   SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(length));
957   SOS_ASSERT_FATAL(SOS_PAGING_BASE_USER_ADDRESS <= base_address);
958 
959   /* Mark all the pages read-only, when already mapped in physical
960      memory */
961   for ( ;
962        length > 0 ;
963        length -= SOS_PAGE_SIZE, base_address += SOS_PAGE_SIZE)
964     {
965       sos_paging_set_prot(base_address,
966                           SOS_VM_MAP_PROT_READ);
967     }
968 
969   return SOS_OK;
970 }
971 
972 
973 sos_ret_t sos_paging_try_resolve_COW(sos_uaddr_t uaddr)
974 {
975   sos_ret_t refcnt;
976 
977   /* Get the page directory entry and table entry index for this
978      address */
979   unsigned index_in_pd = virt_to_pd_index(uaddr);
980   unsigned index_in_pt = virt_to_pt_index(uaddr);
981   
982   /* Get the PD of the current context */
983   struct x86_pde *pd = (struct x86_pde*)
984     (SOS_PAGING_MIRROR_VADDR
985      + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
986 
987   /* Address of the PT in the mirroring */
988   struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
989                                            + SOS_PAGE_SIZE*index_in_pd);
990 
991   /* No page mapped at this address ? */
992   if (! pd[index_in_pd].present)
993     return -SOS_EFAULT;
994   if (! pt[index_in_pt].present)
995     return -SOS_EFAULT;
996   
997   /* Read-only PT not supported by kernel ! */
998   if (! pd[index_in_pd].write)
999     return -SOS_EFAULT;
1000 
1001   /* Cannot understand a COW request if the page is already
1002      read/write */
1003   SOS_ASSERT_FATAL(! pt[index_in_pt].write);
1004 
1005   /* We do a private copy of the page only if the current mapped page
1006      is shared by more than 1 process */
1007   refcnt = sos_physmem_get_physpage_refcount(pt[index_in_pt].paddr << 12);
1008   SOS_ASSERT_FATAL(refcnt > 0);
1009 
1010   if (refcnt == 1)
1011     {
1012       /* We are the only address space to reference this page, we can
1013          safely turn it read/write now */
1014       pt[index_in_pt].write = 1;
1015       invlpg(pt[index_in_pt].paddr << 12);
1016     }
1017 
1018   /* Otherwise we need to make a private copy of the page */
1019   else
1020     {
1021       sos_paddr_t new_ppage;
1022       sos_vaddr_t vpage_src, tmp_dest;
1023 
1024       /* For that, we allocate the destination page inside the kernel
1025          space to perform the copy. We will transfer it into its
1026          final user-space address later */
1027       tmp_dest = sos_kmem_vmm_alloc(1, SOS_KMEM_VMM_MAP);
1028       if (! tmp_dest)
1029         return -SOS_ENOMEM;
1030 
1031       /* copy the contents of the page */
1032       vpage_src = SOS_PAGE_ALIGN_INF(uaddr);
1033       memcpy((void*)tmp_dest, (void*)vpage_src, SOS_PAGE_SIZE);
1034 
1035       /* replace the original (read-only) mapping with a (read/write)
1036          mapping to the new page. This will automatically unreference
1037          the original page */
1038       new_ppage = sos_paging_get_paddr(tmp_dest);
1039       SOS_ASSERT_FATAL(new_ppage != (sos_vaddr_t)NULL);
1040       if (SOS_OK != sos_paging_map(new_ppage, vpage_src,
1041                                    TRUE,
1042                                    SOS_VM_MAP_PROT_READ
1043                                    | SOS_VM_MAP_PROT_WRITE))
1044         {
1045           sos_kmem_vmm_free(tmp_dest);
1046           return -SOS_ENOMEM;
1047         }
1048 
1049       /* We can now unmap the destination page from inside the
1050          kernel and free the kernel VM range for it */
1051       SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free(tmp_dest));
1052     }
1053 
1054   /* That's all, folks ! */
1055   return SOS_OK;
1056 }

source navigation ] diff markup ] identifier search ] general search ]