基于哈希表实现页面置换算法
RhythmLian 人气:0首先,在看这篇文章前,你需要了解缓存是干嘛的?
缓存
众所周知,程序运行时,数据一般存在内存或磁盘里,而内存中的数据总是可以被更快速的获取。但是内存空间是有限的,大多数人PC的内存可能在4G~16G之间,这意味着你必须要舍弃一部分不频繁被访问的数据,把它们存在磁盘里;而把经常需要被访问的数据存在内存里,这就是缓存的基本思路。
但对于程序(和你)而言,无法预测哪些数据是被经常访问的,所以你只能根据访问数据的历史来推测和统计哪些数据是被经常访问的,并把它存在内存里,如果内存满了就找个最不被经常访问的数据替换掉。这个统计过程和改写内存的策略通常被称为“页面置换算法”,事实上,所有可实现的缓存策略(页面置换算法)都是基于历史访问信息来实现的。科学家经过几十年的努力,发明了许许多多的页面置换算法,比如:FIFO、LFU、LRU、ARC、MQ、GD、GDSF....,它们各有所长,没有孰优孰劣。
缺页数(率):为了评判页面置换算法的优劣,针对访问数据的次数n,和数据未被缓存命中的次数(缺页数)p,缺页率=p/n。显然,缺页率越小,页面置换算法越优秀。
正事
本文基于哈希表的内存管理结构,简单地实现线程安全的缓存算法(LFU, LRU, ARC, MQ, GD, GDSF):
首先看API:(cache.h)
1 #pragma once 2 3 #include <sys/time.h> 4 #ifdef __cplusplus 5 extern "C" { 6 #endif 7 8 typedef struct _cache *cache_t, _cache; 9 typedef struct _cache_ele cache_pair; 10 typedef struct _cache_ret { /// 读缓存返回结果 11 long cost; /// 代价 12 const char*cache; /// 数据 13 }cache_ret; 14 /** 15 * @API 16 * create, delete, search, read 17 */ 18 cache_t new_cache (unsigned capacity, cache_ret(*model)(cache_t, const char*)); /// 新建 19 void del_cache (cache_t cache); /// 删除 20 unsigned cache_page_fault (cache_t cache); /// 缺页数 21 cache_ret read_cache (cache_t cache, const char*filename); /// 读缓存 22 23 /** 24 * @Cache_Algorithm_Model 25 * cache_ret(*)(cache_t, const char*) 26 */ 27 cache_ret LRU (cache_t cache, const char*request); /// 页面替换算法模型 28 cache_ret LFU (cache_t cache, const char*request); 29 cache_ret ARC (cache_t cache, const char*request); 30 cache_ret MQ (cache_t cache, const char*request); 31 cache_ret GD (cache_t cache, const char*request); 32 cache_ret GDSF(cache_t cache, const char*request); 33 34 #ifdef __cplusplus 35 } 36 #endif
数据结构:
缓存:
1 struct _cache_ele { /// 数据单元 2 char *key, *file_cache; /// 键值、数据 3 long cost; /// 代价(长度) 4 struct timeval pre_t; /// 上次访问时间 5 unsigned int cnt; /// 访问次数 6 struct _cache_ele *nxt, *pre; 7 }; 8 9 struct _cache { 10 cache_pair table[HASHTABELSIZE], *first_table; /// 哈希表,first_table根据需要生成 11 cache_ret (*f)(cache_t, const char *); /// 页面置换策略 12 pthread_mutex_t mutex; /// 线程安全的锁 13 unsigned int capacity, _cur, first_cur, page_fault; /// 容量、当前量、ft当前量、缺页数 14 };/// *cache_t
缓存策略实现:
缓存策略实际上是一个选择问题,如果缓存没有满,那么显然可以直接把新请求的数据直接读到缓存中,如果满了,则按照策略选一个数据替换掉。
(cache.c完整代码)
1 #include "cache.h" 2 #include <zconf.h> 3 #include "stdlib.h" 4 #include <sys/mman.h> 5 #include "pthread.h" 6 #include "string.h" 7 #include <sys/time.h> 8 #include <fcntl.h> 9 10 #define RMALLOC(type,n) (type*)malloc(sizeof(type)*(n)) 11 #define MALLOC(p,type,n) type*p = RMALLOC(type, n) 12 #define MAX_BUFFER_LEN 1024ll * 1024 13 #ifndef HASHTABLESZIE 14 #define HASHTABELSIZE 10005 15 #endif 16 17 unsigned int string_hash(const char *str) { 18 unsigned int hash = 0; 19 int i; 20 for (i = 0; *str; i++) { 21 if ((i & 1) == 0)hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3)); 22 else hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5))); 23 } 24 return (hash & 0x7FFFFFFF); 25 } 26 27 struct _cache_ele { 28 char *key, *file_cache; 29 long cost; 30 struct timeval pre_t; 31 unsigned int cnt; 32 struct _cache_ele *nxt, *pre; 33 }; 34 35 cache_pair*new_cache_pair(){ 36 MALLOC(res, cache_pair, 1); 37 res->key = res->file_cache = NULL; 38 res->cnt = res->cost = 0; 39 res->nxt = res->pre = NULL; 40 return res; 41 } 42 43 void del_cache_pair(cache_pair *del) { 44 free(del->key); 45 free(del->file_cache); 46 free(del); 47 } 48 49 struct _cache { 50 cache_pair table[HASHTABELSIZE], *first_table; /// hash table 51 cache_ret (*f)(cache_t, const char *); /// function pointer 52 pthread_mutex_t mutex; 53 unsigned int capacity, _cur, first_cur, page_fault; 54 };/// *cache_t 55 56 cache_t new_cache(unsigned capacity, cache_ret(*model)(cache_t, const char *)) { 57 if (model) { 58 MALLOC(ret, _cache, 1); 59 pthread_mutex_init(&ret->mutex, NULL); 60 ret->capacity = capacity; 61 ret->page_fault = ret->first_cur = ret->_cur = 0; 62 memset(ret->table, 0, sizeof(cache_pair) * HASHTABELSIZE); 63 if (model == ARC)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE); 64 else if(model == MQ)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE * 3); 65 else ret->first_table = NULL; 66 ret->f = model; 67 return ret; 68 } 69 return NULL; 70 } 71 72 cache_ret read_cache(cache_t cache, const char *filename) { 73 pthread_mutex_lock(&cache->mutex); 74 cache_ret res = cache->f(cache, filename); 75 pthread_mutex_unlock(&cache->mutex); 76 return res; 77 } 78 79 unsigned cache_page_fault(cache_t cache){ 80 return cache->page_fault; 81 } 82 83 void del_cache(cache_t cache) { 84 pthread_mutex_destroy(&cache->mutex); 85 for (int i = 0; i < HASHTABELSIZE; ++i) { 86 cache_pair *p = cache->table[i].nxt; 87 while (p) { 88 cache_pair *tmp = p; 89 p = p->nxt; 90 del_cache_pair(tmp); 91 } 92 } 93 if (cache->first_table) { 94 for (int i = 0; i < HASHTABELSIZE; ++i) { 95 cache_pair *p = cache->first_table[i].nxt; 96 while (p) { 97 cache_pair *tmp = p; 98 p = p->nxt; 99 del_cache_pair(tmp); 100 } 101 } 102 free(cache->first_table); 103 } 104 free(cache); 105 } 106 107 cache_pair *is_in_table(cache_pair *table, const char *request, int *ret) { 108 unsigned int index = string_hash(request) % HASHTABELSIZE; 109 cache_pair *src = table + index; 110 if (!src->nxt) { 111 *ret = 0; 112 return src; 113 } 114 src = src->nxt; 115 while (strcmp(src->key, request)) { 116 cache_pair *pre = src; 117 src = src->nxt; 118 if (!src) { /// not in table: return pre node 119 *ret = 0; 120 return pre; 121 } 122 } 123 *ret = 1; 124 return src; 125 } 126 127 void replace_after_src(cache_pair *src, const char *request) { 128 src = src->nxt; 129 src->cnt = 1; 130 gettimeofday(&src->pre_t, NULL); 131 src->key = src->key ? (char *) realloc(src->key, strlen(request) + 1) : RMALLOC(char, strlen(request) + 1); 132 strcpy(src->key, request); 133 int fd = open(request, O_RDONLY); 134 if (fd > 0) { 135 char *fp = mmap(NULL, MAX_BUFFER_LEN, PROT_READ, MAP_SHARED, fd, 0); 136 src->cost = strlen(fp) + 1; 137 src->file_cache = src->file_cache ? (char *) realloc(src->file_cache, src->cost) : RMALLOC(char, src->cost); 138 strcpy(src->file_cache, fp); 139 munmap(fp, MAX_BUFFER_LEN); 140 close(fd); 141 } else { 142 src->cost = -1; 143 if (src->file_cache)free(src->file_cache); 144 src->file_cache = NULL; 145 } 146 } 147 148 void add_after_src(cache_pair *src, const char *request) { 149 src->nxt = new_cache_pair(); 150 src->nxt->pre = src; 151 replace_after_src(src, request); 152 } 153 154 void replace_copy(cache_pair *src, cache_pair *aim) { 155 src = src->nxt; 156 src->cnt = aim->cnt; 157 gettimeofday(&src->pre_t, NULL); 158 src->cost = aim->cost; 159 free(src->key); 160 free(src->file_cache); 161 src->key = aim->key; 162 src->file_cache = aim->file_cache; 163 aim->pre->nxt = aim->nxt; 164 free(aim); 165 } 166 167 void add_copy(cache_pair *src, cache_pair *aim) { 168 src->nxt = new_cache_pair(); 169 src->nxt->pre = src; 170 replace_copy(src, aim); 171 } 172 173 cache_pair *LRU_CHOOSE(cache_pair *table) { 174 double mn = -1; 175 cache_pair *res = NULL; 176 for (int i = 0; i < HASHTABELSIZE; ++i) 177 if (table[i].nxt) { 178 cache_pair *ptr = table + i; 179 while (ptr->nxt) { 180 cache_pair *pre = ptr; 181 ptr = ptr->nxt; 182 double cur = ptr->pre_t.tv_sec * 1000.0 + ptr->pre_t.tv_usec / 1000.0; 183 if (mn < 0 || cur < mn) { 184 mn = cur; 185 res = pre; 186 } 187 } 188 } 189 return res; 190 } 191 192 cache_pair *LFU_CHOOSE(cache_pair *table) { 193 int mn = -1; 194 cache_pair *res = NULL; 195 for (int i = 0; i < HASHTABELSIZE; ++i) 196 if (table[i].nxt) { 197 cache_pair *ptr = table + i; 198 while (ptr->nxt) { 199 cache_pair *pre = ptr; 200 ptr = ptr->nxt; 201 int cur = ptr->cnt; 202 if (mn < 0 || cur < mn) { 203 mn = cur; 204 res = pre; 205 } 206 } 207 } 208 return res; 209 } 210 211 cache_pair *GD_CHOOSE(cache_pair *table) { 212 double mn = -1; 213 cache_pair *res = NULL; 214 for (int i = 0; i < HASHTABELSIZE; ++i) 215 if (table[i].nxt) { 216 cache_pair *ptr = table + i; 217 while (ptr->nxt) { 218 cache_pair *pre = ptr; 219 ptr = ptr->nxt; 220 double cur = ptr->cost + ptr->pre_t.tv_sec; 221 if (mn < 0 || cur < mn) { 222 mn = cur; 223 res = pre; 224 } 225 } 226 } 227 return res; 228 } 229 230 cache_pair *GDSF_CHOOSE(cache_pair *table) { 231 double mn = -1; 232 cache_pair *res = NULL; 233 for (int i = 0; i < HASHTABELSIZE; ++i) 234 if (table[i].nxt) { 235 cache_pair *ptr = table + i; 236 while (ptr->nxt) { 237 cache_pair *pre = ptr; 238 ptr = ptr->nxt; 239 double cur = ptr->cnt * ptr->cost + ptr->pre_t.tv_sec; 240 if (mn < 0 || cur < mn) { 241 mn = cur; 242 res = pre; 243 } 244 } 245 } 246 return res; 247 } 248 249 cache_ret LRU(cache_t set, const char *request) { 250 int flag; 251 cache_pair *src = is_in_table(set->table, request, &flag); 252 if (flag) { /// real node 253 src->cnt++; 254 gettimeofday(&src->pre_t, NULL); 255 } else { /// pre node 256 ++set->page_fault; 257 if (set->_cur == set->capacity) { /// choose and replace 258 src = LRU_CHOOSE(set->table); 259 replace_after_src(src, request); 260 } else { /// add node 261 add_after_src(src, request); 262 ++set->_cur; 263 } 264 src = src->nxt; 265 } 266 return (cache_ret) {src->cost, src->file_cache}; 267 } 268 269 cache_ret LFU(cache_t set, const char *request) { 270 int flag; 271 cache_pair *src = is_in_table(set->table, request, &flag); 272 if (flag) { 273 src->cnt++; 274 gettimeofday(&src->pre_t, NULL); 275 } else { 276 ++set->page_fault; 277 if (set->_cur == set->capacity) { 278 src = LFU_CHOOSE(set->table); 279 replace_after_src(src, request); 280 } else { 281 add_after_src(src, request); 282 ++set->_cur; 283 } 284 src = src->nxt; 285 } 286 return (cache_ret) {src->cost, src->file_cache}; 287 } 288 289 cache_ret ARC(cache_t set, const char *request) { 290 int flag; 291 cache_pair *first_table = set->first_table; 292 cache_pair *src = is_in_table(set->table, request, &flag); 293 if (flag) { /// in second table 294 src->cnt++; 295 gettimeofday(&src->pre_t, NULL); 296 } else { 297 cache_pair *first_src = is_in_table(first_table, request, &flag); 298 if (flag) { /// in first table 299 ++first_src->cnt; 300 if (set->_cur == set->capacity) { /// choose and replace 301 src = LRU_CHOOSE(set->table); 302 replace_copy(src, first_src); /// copy data to nxt src and delete first_src 303 } else { /// add node 304 add_copy(src, first_src); /// create node and replace 305 ++set->_cur; 306 } 307 src = src->nxt; 308 } else { /// not in first table 309 ++set->page_fault; 310 if (set->first_cur == set->capacity) { 311 first_src = LRU_CHOOSE(first_table); 312 replace_after_src(first_src, request); 313 } else { 314 add_after_src(first_src, request); 315 ++set->first_cur; 316 } 317 src = first_src->nxt; 318 } 319 } 320 return (cache_ret) {src->cost, src->file_cache}; 321 } 322 323 cache_ret MQ(cache_t set, const char *request) { 324 int flag; 325 cache_pair *first_table = set->first_table; 326 cache_pair *src = is_in_table(set->table, request, &flag); 327 if (flag) { /// in second table 328 src->cnt++; 329 gettimeofday(&src->pre_t, NULL); 330 } else { 331 cache_pair *first_src = is_in_table(first_table, request, &flag); 332 if (flag) { /// in first table 333 ++first_src->cnt; 334 if (set->_cur == set->capacity) { /// choose and replace 335 src = LRU_CHOOSE(set->table); 336 replace_copy(src, first_src); /// copy data to nxt src and delete first_src 337 } else { /// add node 338 add_copy(src, first_src); /// create node and replace 339 ++set->_cur; 340 } 341 src = src->nxt; 342 } else { /// not in first table 343 ++set->page_fault; 344 if (set->first_cur == set->capacity) { 345 first_src = LRU_CHOOSE(first_table); 346 replace_after_src(first_src, request); 347 } else { 348 add_after_src(first_src, request); 349 ++set->first_cur; 350 } 351 src = first_src->nxt; 352 } 353 } 354 return (cache_ret) {src->cost, src->file_cache}; 355 } 356 357 cache_ret GD(cache_t set, const char *request) { 358 int flag; 359 cache_pair *src = is_in_table(set->table, request, &flag); 360 if (flag) { 361 src->cnt++; 362 gettimeofday(&src->pre_t, NULL); 363 } else { 364 ++set->page_fault; 365 if (set->_cur == set->capacity) { 366 src = GD_CHOOSE(set->table); 367 replace_after_src(src, request); 368 } else { 369 add_after_src(src, request); 370 ++set->_cur; 371 } 372 src = src->nxt; 373 } 374 return (cache_ret) {src->cost, src->file_cache}; 375 } 376 377 cache_ret GDSF(cache_t set, const char *request) { 378 int flag; 379 cache_pair *src = is_in_table(set->table, request, &flag); 380 if (flag) { 381 src->cnt++; 382 gettimeofday(&src->pre_t, NULL); 383 } else { 384 ++set->page_fault; 385 if (set->_cur == set->capacity) { 386 src = GDSF_CHOOSE(set->table); 387 replace_after_src(src, request); 388 } else { 389 add_after_src(src, request); 390 ++set->_cur; 391 } 392 src = src->nxt; 393 } 394 return (cache_ret) {src->cost, src->file_cache}; 395 }
---版权:代码遵从MIT协议开源,可以按需使用。(地址:cache.c)
---转载需标明出处,侵权必究
---作者:RhythmLian: https://github.com/Rhythmicc
加载全部内容