|
[ source navigation ] [ diff markup ] [ identifier search ] [ general search ] |
|||
|
001 /* Copyright (C) 2005,2006 David Decotigny 002 003 This program is free software; you can redistribute it and/or 004 modify it under the terms of the GNU General Public License 005 as published by the Free Software Foundation; either version 2 006 of the License, or (at your option) any later version. 007 008 This program is distributed in the hope that it will be useful, 009 but WITHOUT ANY WARRANTY; without even the implied warranty of 010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 011 GNU General Public License for more details. 012 013 You should have received a copy of the GNU General Public License 014 along with this program; if not, write to the Free Software 015 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 016 USA. 017 */ 018 #ifndef _SOS_FSPAGECACHE_H_ 019 #define _SOS_FSPAGECACHE_H_ 020 021 022 /** 023 * @file fs_pagecache.h 024 * 025 * Simple page cache interface. Used to automate the synchronization 026 * between the rad/write operations and mmap. A "FS page cache" is 027 * simply a set of pages mapping a file in memory. A file may not be 028 * entirely mapped into memory: its pages are mapped only if any user 029 * thread invoked an mmap and page-faulted inside the mapped region to 030 * map these pages into memory. Contrary to some other caches in SOS 031 * (eg the block cache), this one is not limited in size. As many 032 * pages as needed will be allocated for it, as permitted by the 033 * available RAM. With a pageout mechanism, this cache will be 034 * shrinked when needed: some of its pages will be transferred back to 035 * disk and unmapped. 036 * 037 * A page cache is used both to cache memory mapped files of an FS, 038 * and memory mapped block devices. Hence: 039 * - there is one SINGLE page cache for each block device (proper to 040 * each disk, to each partition) 041 * - there is one SINGLE page cache for each file of a file system 042 * 043 * For block devices, the page cache automatically synchronizes the 044 * pages with the block cache as long as these mapped pages are 045 * accessed through the read/write API. However, <b>NO <i>automatic 046 * and accurate</i> synchronization</b> between the in-memory modified 047 * pages (accessed through the MMU) and the block cache is provided 048 * because we have no way to collect the accurate list of pages 049 * modified through MMU write accesses (this would require either to 050 * catch all the MMU write operations [too inefficient !], or to have 051 * a reverse-mapping system in order to look at the dirty bit of all 052 * the mappings). Hence, to enforce blkcache/pagecache 053 * synchronization, the msync/munmap API must be used manually. Thus, 054 * the page cache is accurately synchronized with the block cache: 055 * - automatically: with the read/write/sync operations 056 * - manually: with the msync and munmap (and of course: exit) operations 057 * 058 * Nevertheless, from the viewpoint of "blkdev.c", the 059 * blockdev_read/write operations are always in sync with the MMU 060 * because the pagecache is accessed prior to the blkcache: any 061 * divergence between the pagecache and the blkcache is hence 062 * <i>hidden</i>. But keep in mind that if you want the disk to 063 * accurately reflect the contents of the mapped pages, you have to 064 * eventually call msync, munmap, or to destroy the address space (ie 065 * exit the process). 066 * 067 * A side effect: if you map /dev/hda and /dev/hda1, both mappings 068 * will be inconsistent and may also be inconsistent with read/write 069 * accesses. This is because the partitions have their own page cache 070 * while they share the block cache with the disk device. A solution 071 * would be to share the page cache between the disk device and all 072 * its partitions. But, due to the fact that partitions are not 073 * necessarily page-aligned in the disk, this would impose some pages 074 * to not correspond to a page-aligned offset inside a partition, 075 * requiring either to have an odd semantic of the mmap syscall (the 076 * allowed device "offset" would depend on the disk partitioning) if 077 * we want to share the mapped pages between the cache and userspace, 078 * or to allocate other pages for the required userspace mappings and 079 * keep them in sync with the page cache pages. Both solutions seem 080 * ugly to me, and not worth implementing since the page cache is 081 * aimed at being generic enough to be used for file mappings: files 082 * don't have sub-files (as do disk devices that have partitions). So 083 * solving the problem is non pertinent for files. And who will ever 084 * need /dev/hda mappings to be consistent with those of /dev/hda1 ?... 085 */ 086 #include <sos/errno.h> 087 #include <sos/uaccess.h> 088 089 090 /** Opaque structure holding a page cache */ 091 struct sos_fs_pagecache; 092 093 /** Opaque structure holding a page of the cache */ 094 struct sos_fs_pagecache_entry; 095 096 097 sos_ret_t sos_fs_pagecache_subsystem_setup(void); 098 099 100 /** 101 * Function called to flush the dirty pages to backing store 102 */ 103 typedef sos_ret_t 104 (*sos_fs_pagecache_sync_function_t)(sos_luoffset_t offset, 105 sos_vaddr_t dirty_page, 106 void * custom_data); 107 108 109 /** 110 * Create a new pagecache. 111 * 112 * @param sync_fct, the function used to flush the dirty pages to 113 * backing store. may be NULL 114 */ 115 struct sos_fs_pagecache * 116 sos_fs_pagecache_new_cache(sos_fs_pagecache_sync_function_t sync_fct, 117 void * sync_fct_custom_data); 118 119 120 /** 121 * Delete the page cache. 122 * 123 * The page cache is expected to be already flushed to backing store 124 */ 125 sos_ret_t 126 sos_fs_pagecache_delete_cache(struct sos_fs_pagecache * pc); 127 128 129 /** 130 * Read from the given offset from the cache, if present. 131 * @return ENOENT when no page for the given offset is mapped, return 132 * EFAULT when the contents could not be completely copied to 133 * destination buffer 134 */ 135 sos_ret_t 136 sos_fs_pagecache_read(struct sos_fs_pagecache * pc, 137 sos_luoffset_t offset, 138 sos_genaddr_t dest_buf, 139 sos_size_t * /* in/out */len); 140 141 142 /** 143 * Write at the given offset from the cache, if present 144 * @return ENOENT when no page for the given offset is mapped, return 145 * EFAULT when the contents could not be completely copied from 146 * source buffer 147 */ 148 sos_ret_t 149 sos_fs_pagecache_write(struct sos_fs_pagecache * pc, 150 sos_luoffset_t offset, 151 sos_genaddr_t src_buf, 152 sos_size_t * /* in/out */len, 153 sos_bool_t synchronous_write); 154 155 156 /** 157 * Function reserved to blkdev.c and FS code: used by the msync 158 * callback to mark a pagecache page dirty 159 * 160 * @param sync_backing_store When TRUE, then the page must be flushed 161 * to backing store. 162 */ 163 sos_ret_t sos_fs_pagecache_set_dirty(struct sos_fs_pagecache * pc, 164 sos_luoffset_t offset, 165 sos_bool_t sync_backing_store); 166 167 168 /** 169 * Prepare a page to be mapped: get a NEW reference to the page 170 * (kernel address) of the page to be mapped, which is also locked in 171 * order to be used. If the page is not yet present in the cache, 172 * allocate it and prepare it to be filled 173 * 174 * @param offset MUST be page-aligned 175 * @param newly_allocated TRUE when the page was not already mapped by 176 * someone: the contents of the page is then IRRELEVANT 177 * 178 * @return NULL on error 179 * 180 * @note The page is also LOCKED, use unlock to unlock it before 181 * unreferencing it 182 */ 183 struct sos_fs_pagecache_entry * 184 sos_fs_pagecache_ref_page(struct sos_fs_pagecache * pc, 185 sos_luoffset_t offset, 186 sos_vaddr_t * /* out */ kernel_vaddr, 187 sos_bool_t * /* out */ newly_allocated); 188 189 190 /** Called by the blkdev.c and FS page_in callback to unlock the entry 191 after it has been initialized. */ 192 sos_ret_t 193 sos_fs_pagecache_unlock_page(struct sos_fs_pagecache * pc, 194 struct sos_fs_pagecache_entry * entry, 195 sos_bool_t initial_fill_aborted); 196 197 198 /** 199 * Called when the page is unmapped from a user process space 200 * @param offset MUST be page-aligned 201 * 202 * @note the page is expected to be present in the cache 203 * @note the entry is expected NOT to be locked ! 204 */ 205 sos_ret_t 206 sos_fs_pagecache_unref_page(struct sos_fs_pagecache * pc, 207 sos_luoffset_t offset); 208 209 210 /** Call the sync function on each dirty page */ 211 sos_ret_t 212 sos_fs_pagecache_sync(struct sos_fs_pagecache * pc); 213 214 #endif
[ source navigation ] | [ diff markup ] | [ identifier search ] | [ general search ] |