The LXR Cross Referencer for SOS

[ source navigation ]
[ diff markup ]
[ identifier search ]
[ general search ]
Article: [ 1 ] [ 2 ] [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 6.5 ] [ 7 ] [ 7.5 ] [ 8 ] [ 9 ] [ 9.5 ]
001 /* Copyright (C) 2005,2006      David Decotigny
002 
003    This program is free software; you can redistribute it and/or
004    modify it under the terms of the GNU General Public License
005    as published by the Free Software Foundation; either version 2
006    of the License, or (at your option) any later version.
007 
008    This program is distributed in the hope that it will be useful,
009    but WITHOUT ANY WARRANTY; without even the implied warranty of
010    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
011    GNU General Public License for more details.
012 
013    You should have received a copy of the GNU General Public License
014    along with this program; if not, write to the Free Software
015    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
016    USA.
017 */
018 #include <sos/ksynch.h>
019 #include <sos/kmem_slab.h>
020 #include <sos/hash.h>
021 #include <sos/physmem.h> /* For SOS_PAGE_MASK */
022 #include <sos/list.h>
023 #include <sos/assert.h>
024 #include <sos/uaccess.h>
025 #include <sos/kmalloc.h>
026 
027 #include "fs_pagecache.h"
028 
029 
030 #define SOS_OFFSET64_PAGE_ALIGN_INF(offs64) \
031   ( ((sos_luoffset_t)(offs64)) & (~ ((sos_luoffset_t)(SOS_PAGE_MASK))) )
032 
033 
034 #define SOS_OFFSET64_IS_PAGE_ALIGNED(offs64) \
035   ( ( ((sos_luoffset_t)(offs64)) & (((sos_luoffset_t)(SOS_PAGE_MASK))) ) == 0 )
036 
037 
038 /**
039  * Definition of an object holding a reference to a shared mapping of
040  * a file/device-mapped cache page.
041  *
042  * @note This structure is huge. We can shrink it largely by removing the
043  * "name" field from the lock structure (32 bytes).
044  */
045 struct sos_fs_pagecache_entry
046 {
047   /** offset of the cached page in the file or device */
048   sos_luoffset_t file_offset;
049 
050   /** Address of the cached page for this offset */
051   sos_vaddr_t    kernel_vaddr;
052 
053   struct sos_kmutex lock;
054   sos_count_t ref_cnt;
055 
056   sos_bool_t initial_fill_aborted; /**< True when the page could not
057                                       be correctly filled */
058 
059   /**
060    * When 0: the page is clean wrt to read/write syscalls, ie the disk
061    * contents reflect the contents of the page since the last
062    * read/write operation. However, the disk may NOT be in sync wrt to
063    * mmap() operations: if mmap() operations occured in the meantime,
064    * the disk may NOT be up to date, and the pagecache entry may even
065    * NOT be considered dirty. This is because we do not trace each of
066    * the read/write MMU operations from every processes (this would
067    * need to catch all writes even on read/write mapped pages) and we
068    * don't have a reverse mapping available to set the page read-only
069    * in every mappings once it has been synched to disk (to
070    * effectively trace the dirty state relative to mmap operations).
071    *
072    * When ">0": at least one process changed the contents of the page
073    * through read/write syscalls since last sync operation.
074    *
075    * @note A boolean is enough for 99% of the code. But we need a real
076    * counter for the sos_fs_pagecache_sync operation to make sure we
077    * don't iterate 2 times over the same page.
078    */
079   sos_lcount_t rw_dirty_order;
080 #define ENTRY_IS_RW_DIRTY(e) ((e)->rw_dirty_order > 0)
081 
082   /** Linkage structure to keep the cache entry in the hash map */
083   struct sos_hash_linkage hlink;
084 
085   /** Links to insert the entry into the rw_sync/rw_dirty lists */
086   struct sos_fs_pagecache_entry *prev, *next;
087 };
088 
089 
090 struct sos_fs_pagecache
091 {
092   /** The operation used to synchronize the mapped pages with the
093       backing store */
094   sos_fs_pagecache_sync_function_t sync_fct;
095   void * sync_fct_custom_data;
096 
097   /** The dictionary offset -> pagecache_entry */
098   struct sos_hash_table  * lookup_table;
099 
100   /* Lists to look into in order to free a node */
101   struct sos_fs_pagecache_entry * rw_sync_list;  /**< Pages in sync
102                                                       with disk wrt
103                                                       read/write API
104                                                       (LRU at end) */
105   struct sos_fs_pagecache_entry * rw_dirty_list; /**< Dirty pages wrt
106                                                       read/write API
107                                                       (LRU last) */
108 
109   /** The "timestamp" high watermark used to iterate over the dirty
110       pages in the sync function */
111   sos_lcount_t top_rw_dirty_order;
112 };
113 
114 
115 /** The slab cache for pagecache */
116 static struct sos_kslab_cache * cache_of_pagecache;
117 
118 
119 /** The slab cache for pagecache entries */
120 static struct sos_kslab_cache * cache_of_pagecache_entry;
121 
122 
123 sos_ret_t sos_fs_pagecache_subsystem_setup()
124 {
125   /* Allocate the slab caches for the page caches and page cache
126      entries */
127 
128   cache_of_pagecache =
129     sos_kmem_cache_create("pagecache", sizeof(struct sos_fs_pagecache),
130                           2, 0, SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO);
131   if (NULL == cache_of_pagecache)
132     return -SOS_ENOMEM;
133 
134   cache_of_pagecache_entry =
135     sos_kmem_cache_create("pagecache_entry",
136                           sizeof(struct sos_fs_pagecache_entry),
137                           2, 0, SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO);
138   if (NULL == cache_of_pagecache_entry)
139     {
140       sos_kmem_cache_destroy(cache_of_pagecache);
141       return -SOS_ENOMEM;
142     }
143 
144   return SOS_OK;
145 }
146 
147 
148 struct sos_fs_pagecache *
149 sos_fs_pagecache_new_cache(sos_fs_pagecache_sync_function_t sync_fct,
150                            void * sync_fct_custom_data)
151 {
152   struct sos_fs_pagecache * pagecache
153     = (struct sos_fs_pagecache*) sos_kmem_cache_alloc(cache_of_pagecache,
154                                                       0);
155   if (NULL == pagecache)
156     return NULL;
157 
158   pagecache->lookup_table = sos_hash_create("pagecache",
159                                             struct sos_fs_pagecache_entry,
160                                             sos_hash_ui64,
161                                             sos_hash_key_eq_ui64,
162                                             127, file_offset, hlink);
163   if (NULL == pagecache->lookup_table)
164     {
165       sos_kmem_cache_free((sos_vaddr_t) pagecache);
166       return NULL;
167     }
168 
169   pagecache->sync_fct             = sync_fct;
170   pagecache->sync_fct_custom_data = sync_fct_custom_data;
171   pagecache->top_rw_dirty_order   = 0x24;
172 
173   return pagecache;
174 }
175 
176 
177 sos_ret_t
178 sos_fs_pagecache_delete_cache(struct sos_fs_pagecache * pc)
179 {
180   /* The cache is EXPECTED to be empty ! */
181 
182   if (!list_is_empty(pc->rw_dirty_list))
183     SOS_FATAL_ERROR("Non empty dirty list");
184   if (!list_is_empty(pc->rw_sync_list))
185     SOS_FATAL_ERROR("Non empty sync list");
186 
187   sos_hash_dispose(pc->lookup_table);
188   return sos_kmem_cache_free((sos_vaddr_t)pc);
189 }
190 
191 
192 /** Helper function to flush a page to disk. Expects the entry to be
193     locked */
194 static sos_ret_t pagecache_sync_page(struct sos_fs_pagecache * pc,
195                                      struct sos_fs_pagecache_entry * entry)
196 {
197   sos_ret_t retval;
198 
199   if (! ENTRY_IS_RW_DIRTY(entry))
200     return SOS_OK;
201 
202   /* Now do the real transfer to backing store */
203   retval = pc->sync_fct(entry->file_offset, entry->kernel_vaddr,
204                         pc->sync_fct_custom_data);
205   if (SOS_OK != retval)
206     return retval;
207 
208   /* Transfer page to the sync list */
209   list_delete(pc->rw_dirty_list, entry);
210   entry->rw_dirty_order = 0;
211   list_add_head(pc->rw_sync_list, entry);
212 
213   return SOS_OK;
214 }
215 
216 
217 /** Helper function to correctly lock an entry */
218 static sos_ret_t pagecache_use(struct sos_fs_pagecache * pc,
219                                struct sos_fs_pagecache_entry * entry)
220 {
221   entry->ref_cnt ++;
222   return sos_kmutex_lock(& entry->lock, NULL);
223 }
224 
225 
226 /**
227  * Helper function to transfer a page to the dirty r/w list
228  */
229 static sos_ret_t pagecache_set_rw_dirty(struct sos_fs_pagecache * pc,
230                                         struct sos_fs_pagecache_entry * entry)
231 {
232   if (ENTRY_IS_RW_DIRTY(entry))
233     return SOS_OK; /* Nothing to do */
234 
235   list_delete(pc->rw_sync_list, entry);
236   entry->rw_dirty_order = ++ pc->top_rw_dirty_order;
237   list_add_head(pc->rw_dirty_list, entry);
238 
239   return SOS_OK;
240 }
241 
242 
243 /** Helper function to correctly unlock an entry, flushing it to disk
244     if needed */
245 static sos_ret_t pagecache_release(struct sos_fs_pagecache * pc,
246                                    struct sos_fs_pagecache_entry * entry)
247 {
248   if (entry->ref_cnt > 1)
249     {
250       entry->ref_cnt --;
251       sos_kmutex_unlock(& entry->lock);
252       return SOS_OK;
253     }
254 
255   /*
256    * The cached page is now referenced ONLY by US, we can try to
257    * remove it from the cache
258    */
259 
260   /* Flush any change to disk, at least if we are sure that its
261      content is legal, ie that the page_in callback did success in
262      filling it */
263   if (! entry->initial_fill_aborted)
264     pagecache_sync_page(pc, entry);
265 
266   /* Ok, now WE are not interested by this entry anymore */
267   entry->ref_cnt --;
268 
269   /* During blocking time, another thread could have asked for the
270      entry. In this case, stop here */
271   if (entry->ref_cnt > 0)
272     {
273       sos_kmutex_unlock(& entry->lock);
274       return SOS_OK;
275     }
276 
277   /* Remove it from the lists */
278   sos_hash_remove(pc->lookup_table, entry);
279   if (ENTRY_IS_RW_DIRTY(entry))
280     list_delete(pc->rw_dirty_list, entry);
281   else
282     list_delete(pc->rw_sync_list, entry);
283 
284   /* We can safely erase it now ! */
285   sos_kmutex_unlock(& entry->lock);
286   SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_dispose(& entry->lock)); /* No thread are waiting */
287   sos_kfree(entry->kernel_vaddr);
288   sos_kmem_cache_free((sos_vaddr_t)entry);
289 
290   return SOS_OK;
291 }
292 
293 
294 /**
295  * Helper function to look up an entry from the cache and lock it. If
296  * the entry does not exist (yet), return NULL.
297  */
298 static struct sos_fs_pagecache_entry *
299 pagecache_lookup_and_lock(struct sos_fs_pagecache * pc,
300                           sos_luoffset_t offset)
301 {
302   sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
303   struct sos_fs_pagecache_entry * entry = NULL;
304 
305   while (TRUE)
306     {
307       entry
308         = (struct sos_fs_pagecache_entry*) sos_hash_lookup(pc->lookup_table,
309                                                            & pgoffs);
310       if (! entry)
311         break;
312 
313       /* Lock it now */
314       SOS_ASSERT_FATAL(SOS_OK == pagecache_use(pc, entry));
315 
316       /*
317        * Entry is now locked
318        */
319       
320       /* Make sure it contains legal contents: if we were blocked
321          because of the page_in operations reading it from disk, an
322          error could have been occured. In this case, we must consider
323          that this entry is not yet inserted in the cache */
324       if (entry->initial_fill_aborted)
325         {
326           pagecache_release(pc, entry);
327           continue;
328         }
329 
330       /* Ok, we have the entry and it is correctly initialized ! */
331       break;
332     }
333   
334   return entry;
335 }
336 
337 
338 sos_ret_t
339 sos_fs_pagecache_read(struct sos_fs_pagecache * pc,
340                       sos_luoffset_t offset,
341                       sos_genaddr_t dest_buf,
342                       sos_size_t * /* in/out */len)
343 {
344   sos_ret_t retval;
345   sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
346   sos_luoffset_t endpos = offset + *len;
347   struct sos_fs_pagecache_entry * entry;
348 
349   entry = pagecache_lookup_and_lock(pc, pgoffs);
350   if (NULL == entry)
351     return -SOS_ENOENT;
352 
353   /* Great ! Found the entry in the cache ! */
354 
355   /* Read only up to the end of the page */
356   if (endpos - pgoffs > SOS_PAGE_SIZE)
357     endpos = pgoffs + SOS_PAGE_SIZE;
358 
359   /* Copy page contents to destination buffer */
360   retval = sos_memcpy_generic_to(dest_buf,
361                                  entry->kernel_vaddr + (offset - pgoffs),
362                                  endpos - offset);
363   pagecache_release(pc, entry);
364 
365   if (retval < 0)
366     {
367       *len = 0;
368       return retval;
369     }
370 
371   *len = retval;
372   if ((sos_luoffset_t)retval != endpos - offset)
373     return -SOS_EFAULT;
374 
375   return SOS_OK;
376 }
377 
378 
379 sos_ret_t
380 sos_fs_pagecache_write(struct sos_fs_pagecache * pc,
381                        sos_luoffset_t offset,
382                        sos_genaddr_t src_buf,
383                        sos_size_t * /* in/out */len,
384                        sos_bool_t synchronous_write)
385 {
386   sos_ret_t retval;
387   sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
388   sos_luoffset_t endpos = offset + *len;
389   struct sos_fs_pagecache_entry * entry;
390 
391   entry = pagecache_lookup_and_lock(pc, pgoffs);
392   if (NULL == entry)
393     return -SOS_ENOENT;
394 
395   /* Great ! Found the entry in the cache ! */
396 
397   /* Read only up to the end of the page */
398   if (endpos - pgoffs > SOS_PAGE_SIZE)
399     endpos = pgoffs + SOS_PAGE_SIZE;
400 
401   /* Copy page contents to destination buffer */
402   retval = sos_memcpy_generic_from(entry->kernel_vaddr + (offset - pgoffs),
403                                    src_buf,
404                                    endpos - offset);
405   /* Transfer the entry in the dirty list if needed */
406   if (retval >= 0)
407     pagecache_set_rw_dirty(pc, entry);
408 
409   if (retval < 0)
410     {
411       *len = 0;
412       pagecache_release(pc, entry);
413       return retval;
414     }
415 
416   *len = retval;
417   if ((sos_luoffset_t)retval != endpos - offset)
418     retval = -SOS_EFAULT;
419   else
420     retval = SOS_OK;
421 
422   /* Flush to disk if needed */
423   if (synchronous_write)
424     {
425       sos_ret_t ret = pagecache_sync_page(pc, entry);
426       if (SOS_OK == retval)
427         retval = ret;
428     }
429 
430   pagecache_release(pc, entry);
431   return retval;
432 }
433 
434 
435 sos_ret_t sos_fs_pagecache_set_dirty(struct sos_fs_pagecache * pc,
436                                      sos_luoffset_t offset,
437                                      sos_bool_t sync_backing_store)
438 {
439   sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
440   struct sos_fs_pagecache_entry * entry;
441 
442   entry = pagecache_lookup_and_lock(pc, pgoffs);
443   if (NULL == entry)
444     return -SOS_ENOENT;
445 
446   /* Great ! Found the entry in the cache ! */
447   pagecache_set_rw_dirty(pc, entry);
448 
449   /* Synchronize to backing store if needed */
450   if (sync_backing_store)
451     pagecache_sync_page(pc, entry);
452 
453   pagecache_release(pc, entry);
454   return SOS_OK;
455 }
456 
457 
458 struct sos_fs_pagecache_entry *
459 sos_fs_pagecache_ref_page(struct sos_fs_pagecache * pc,
460                           sos_luoffset_t offset,
461                           sos_vaddr_t * /* out */ kernel_vaddr,
462                           sos_bool_t * /* out */ newly_allocated)
463 {
464   sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
465   struct sos_fs_pagecache_entry * entry;
466 
467   /* The offset is expected to be page-aligned */
468   if (pgoffs != offset)
469     return NULL;
470 
471   entry = pagecache_lookup_and_lock(pc, pgoffs);
472   if (NULL != entry)
473     {
474       /* Found it ! No need to go further */
475       *newly_allocated = FALSE;
476       *kernel_vaddr = entry->kernel_vaddr;
477       return entry;
478     }
479 
480 
481   /*
482    * Need to allocate a new kernel page
483    */
484 
485   entry = (struct sos_fs_pagecache_entry*)
486     sos_kmem_cache_alloc(cache_of_pagecache_entry, 0);
487   if (NULL == entry)
488     return (sos_vaddr_t)NULL;
489 
490   if (SOS_OK != sos_kmutex_init(& entry->lock, "pagecache_entry",
491                                 SOS_KWQ_ORDER_FIFO))
492     {
493       sos_kmem_cache_free((sos_vaddr_t)entry);
494       return NULL;
495     }
496 
497   /* Initial state of the page correspond to an erroneous
498      initialization */
499   entry->file_offset          = pgoffs;
500   entry->initial_fill_aborted = TRUE;
501   entry->ref_cnt              = 1;
502 
503   /* Allocate the page */
504   entry->kernel_vaddr = sos_kmalloc(SOS_PAGE_SIZE, 0);
505   if (((sos_vaddr_t)NULL) == entry->kernel_vaddr)
506     {
507       sos_kmutex_dispose(& entry->lock);
508       sos_kmem_cache_free((sos_vaddr_t)entry);
509       return NULL;
510     }
511 
512   /* Own the mutex */
513   SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_lock(& entry->lock, NULL));
514   
515   /* Try to insert it into the hash table. Might fail if the page was
516      already inserted, which could be possible because the allocation
517      routines might have blocked */
518   if (SOS_OK != sos_hash_insert(pc->lookup_table, entry))
519     {
520       /* entry was inserted during allocations, undo the new entry */
521       sos_kmutex_unlock(& entry->lock);
522       sos_kmutex_dispose(& entry->lock);
523       sos_kfree(entry->kernel_vaddr);
524       sos_kmem_cache_free((sos_vaddr_t)entry);
525 
526       /* Get the real entry */
527       entry = pagecache_lookup_and_lock(pc, offset);
528       SOS_ASSERT_FATAL(NULL != entry);
529       *kernel_vaddr = entry->kernel_vaddr;
530       *newly_allocated = FALSE;
531       return entry;
532     }
533 
534   /* Now register the entry in the sync list */
535   entry->rw_dirty_order = 0;
536   list_add_head(pc->rw_sync_list, entry);
537 
538   *newly_allocated = TRUE;
539   *kernel_vaddr = entry->kernel_vaddr;
540   return entry;
541 }
542 
543 
544 sos_ret_t
545 sos_fs_pagecache_unlock_page(struct sos_fs_pagecache * pc,
546                              struct sos_fs_pagecache_entry * entry,
547                              sos_bool_t initial_fill_aborted)
548 {
549 
550   entry->initial_fill_aborted = initial_fill_aborted;
551 
552   if (initial_fill_aborted)
553     return pagecache_release(pc, entry);
554 
555   return sos_kmutex_unlock(& entry->lock);
556 }
557 
558 
559 sos_ret_t
560 sos_fs_pagecache_unref_page(struct sos_fs_pagecache * pc,
561                             sos_luoffset_t offset)
562 {
563   sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
564   struct sos_fs_pagecache_entry * entry;
565 
566   /* The offset is expected to be page-aligned */
567   if (pgoffs != offset)
568     return -SOS_EINVAL;
569 
570   entry
571     = (struct sos_fs_pagecache_entry*) sos_hash_lookup(pc->lookup_table,
572                                                 & pgoffs);
573   SOS_ASSERT_FATAL(NULL != entry);
574   SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_lock(& entry->lock, NULL));
575   return pagecache_release(pc, entry);
576 }
577 
578 
579 sos_ret_t
580 sos_fs_pagecache_sync(struct sos_fs_pagecache * pc)
581 {
582   sos_ret_t retval = SOS_OK;
583   int dummy = 0;
584   sos_lcount_t rw_dirty_order = 0;
585 
586   /** High watermark telling "you won't take the pages added
587       afterwards into account" */
588   sos_lcount_t top_rw_dirty_order = pc->top_rw_dirty_order;
589 
590   if (list_is_empty(pc->rw_dirty_list))
591     return SOS_OK;
592 
593   /* This scan will be exhaustive and resilient to addition/removal of
594      devices as long as new devices are added with list_add_tail
595      (because the scan is "forward", ie in order head -> tail) */
596   while (TRUE)
597     {
598       struct sos_fs_pagecache_entry * entry = NULL;
599       int ndirty;
600 
601       /* As long as we don't block, we can safely access the
602          prev/next fields of the page descriptor */
603       list_foreach_backward(pc->rw_dirty_list, entry, ndirty)
604         {
605           sos_ret_t ret = SOS_OK;
606           struct sos_fs_pagecache_entry * prev_entry = NULL;
607 
608           /* Reached the initial high watermark ? Don't take the
609              additional pages into account */
610           if (entry->rw_dirty_order > top_rw_dirty_order)
611             break;
612 
613           if (entry->rw_dirty_order <= rw_dirty_order)
614             continue;
615 
616           rw_dirty_order = entry->rw_dirty_order;
617           prev_entry     = entry->prev;
618 
619           SOS_ASSERT_FATAL(SOS_OK == pagecache_use(pc, entry));
620           if (! entry->initial_fill_aborted)
621             ret = pagecache_sync_page(pc, entry);
622           if (SOS_OK != ret)
623             retval = ret;
624           pagecache_release(pc, entry);
625 
626           /* We must NOT continue the loops because the prev/next page
627              cache entry might have been removed or added (sync pages,
628              by definition) ! */
629           if (prev_entry != entry->prev)
630             goto lookup_next_ent;
631         }
632 
633       /* Reached the end of the list */
634       break;
635 
636     lookup_next_ent:
637       /* Loop over */
638       dummy ++;
639     }
640 
641   return retval;
642 }
[ source navigation ]
[ diff markup ]
[ identifier search ]
[ general search ]
SimpleOS

LXR

Navigation

The LXR Cross Referencer for SOS