/**
@file file_contexts.c
@brief Cache sent fs events
@details Copyright (c) 2023 Acronis International GmbH
@author Bruce Wang (bruce.wang@acronis.com)
@since $Id: $
*/
#include "compat.h"
#include "debug.h"
#include "file_contexts.h"
#include "file_contexts_priv.h"
#include "memory.h"
#include <linux/jiffies.h>
#ifndef list_first_entry_or_null
#define list_first_entry_or_null(ptr, type, member) (list_empty(ptr) ? NULL : list_first_entry(ptr, type, member))
#endif
static file_context_manager_t global_fs_event_cache_manager;
void file_contexts_init(void)
{
int i = 0;
spin_lock_init(&global_fs_event_cache_manager.writer_lock);
for (; i < MAX_TRANSPORT_EXTENDED_SIZE; i++)
{
global_fs_event_cache_manager.tables[i] = NULL;
}
}
static inline void free_common_node(file_context_common_node_t *common_node)
{
void *dummy = container_of(common_node, dummy_node_t, common_node);
if (common_node->pre_free_func)
{
common_node->pre_free_func(common_node);
}
mem_free(dummy);
}
static inline void deferred_free_common_node(struct rcu_head *head)
{
file_context_common_node_t *common_node = container_of(head, file_context_common_node_t, rcu);
free_common_node(common_node);
}
static inline void get_common_node(file_context_common_node_t *common_node)
{
atomic_inc(&common_node->ref_count);
}
static inline bool get_common_node_rcu(file_context_common_node_t *common_node)
{
return atomic_inc_not_zero(&common_node->ref_count);
}
static inline void put_common_node(file_context_common_node_t *common_node)
{
if (atomic_dec_and_test(&common_node->ref_count))
{
call_rcu(&common_node->rcu, deferred_free_common_node);
}
}
/* This function requires lock*/
static inline void remove_common_node_from_lru(file_context_common_node_t *common_node)
{
list_del(&common_node->lru_list_node);
common_node->lru_list_node_inserted = false;
}
/* This function requires lock*/
static inline void remove_and_put_common_node(file_context_common_node_t *common_node, file_context_common_table_t *common_table)
{
remove_common_node_from_lru(common_node);
hash_del_rcu(&common_node->hash_node);
common_table->size -= 1;
put_common_node(common_node);
}
static void clear_common_table(file_context_common_table_t *common_table)
{
file_context_common_node_t *common_node = NULL;
/* RCU WRITER */
spin_lock(&common_table->spinlock);
while (1)
{
common_node = list_first_entry_or_null(&common_table->lru_list, file_context_common_node_t, lru_list_node);
if (!common_node)
{
break;
}
remove_and_put_common_node(common_node, common_table);
}
spin_unlock(&common_table->spinlock);
/* RCU WRITER */
}
static void context_entry_clear(file_context_tables_t *tables)
{
DPRINTF("%s:open_table [%u] [%u]", __func__, tables->open_table.common_table.size,
tables->open_table.common_table.max_size);
clear_common_table(&tables->open_table.common_table);
DPRINTF("%s:open_table [%u] [%u]", __func__, tables->open_table.common_table.size,
tables->open_table.common_table.max_size);
DPRINTF("%s:read_table [%u] [%u]", __func__, tables->read_table.common_table.size,
tables->read_table.common_table.max_size);
clear_common_table(&tables->read_table.common_table);
DPRINTF("%s:read_table [%u] [%u]", __func__, tables->read_table.common_table.size,
tables->read_table.common_table.max_size);
DPRINTF("%s:write_table [%u] [%u]", __func__, tables->write_table.common_table.size,
tables->write_table.common_table.max_size);
clear_common_table(&tables->write_table.common_table);
DPRINTF("%s:write_table [%u] [%u]", __func__, tables->write_table.common_table.size,
tables->write_table.common_table.max_size);
}
static inline void deferred_free_context_entry(struct rcu_head *head)
{
file_context_tables_t *table = container_of(head, file_context_tables_t, rcu);
// perhaps this is excessive, but it is better to be safe
context_entry_clear(table);
vmem_free(table);
}
static inline void put_file_context_entry(file_context_tables_t *entry)
{
if (atomic_dec_and_test(&entry->ref_count))
{
call_rcu(&entry->rcu, deferred_free_context_entry);
}
}
static inline file_context_tables_t *get_file_context_entry(transport_id_t transport_id)
{
file_context_tables_t *entry;
int idx = transport_id_index(transport_id);
if (idx < 0 || idx >= MAX_TRANSPORT_EXTENDED_SIZE)
{
return NULL;
}
rcu_read_lock();
entry = rcu_dereference(global_fs_event_cache_manager.tables[idx]);
if (entry) {
if (entry->transport_id != transport_id) {
DPRINTF("Transport id mismatch: %ld != %ld", entry->transport_id, transport_id);
entry = NULL;
} else {
if (!atomic_inc_not_zero(&entry->ref_count)) {
entry = NULL;
}
}
}
rcu_read_unlock();
return entry;
}
// This function does not verify the transport_id
static inline file_context_tables_t *get_file_context_entry_by_idx(int idx)
{
file_context_tables_t *entry;
if (idx < 0 || idx >= MAX_TRANSPORT_EXTENDED_SIZE)
{
return NULL;
}
rcu_read_lock();
entry = rcu_dereference(global_fs_event_cache_manager.tables[idx]);
if (entry) {
if (!atomic_inc_not_zero(&entry->ref_count)) {
entry = NULL;
}
}
rcu_read_unlock();
return entry;
}
void release_file_context_entry(transport_id_t id)
{
file_context_tables_t* table = NULL;
int idx = transport_id_index(id);
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.tables[idx])
{
if (global_fs_event_cache_manager.tables[idx]->transport_id == id) {
table = global_fs_event_cache_manager.tables[idx];
rcu_assign_pointer(global_fs_event_cache_manager.tables[idx], NULL);
} else {
WPRINTF("release_file_context_entry: %d, id mismatch: %llu, %llu", idx, global_fs_event_cache_manager.tables[idx]->transport_id, id);
}
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (table) {
put_file_context_entry(table);
}
IPRINTF("release_file_context_entry: %llu\n", id);
}
void file_contexts_deinit(void)
{
int i = 0;
spin_lock(&global_fs_event_cache_manager.writer_lock);
for (; i < MAX_TRANSPORT_EXTENDED_SIZE; i++)
{
if (global_fs_event_cache_manager.tables[i])
{
put_file_context_entry(global_fs_event_cache_manager.tables[i]);
global_fs_event_cache_manager.tables[i] = NULL;
}
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
}
static void init_file_context_common_table(file_context_common_table_t *table, struct hlist_head *hashtbale_head,
uint8_t hashbits, unsigned int max_size, unsigned short clean_count)
{
table->hashtable = hashtbale_head;
INIT_LIST_HEAD(&table->lru_list);
spin_lock_init(&table->spinlock);
table->hashbits = hashbits;
table->max_size = max_size;
table->clean_count = clean_count;
table->size = 0;
}
static file_context_tables_t* init_file_context_entry(transport_id_t id)
{
file_context_tables_t* tables = vmem_alloc(sizeof(file_context_tables_t));
if (!tables)
{
return NULL;
}
*tables = (file_context_tables_t){0};
tables->transport_id = id;
atomic_set(&tables->ref_count, 1);
hash_init(tables->open_table.hashtable);
init_file_context_common_table(&tables->open_table.common_table, tables->open_table.hashtable,
FILE_CONTEXT_BIG_TABLE_SIZE_BITS, FILE_CONTEXT_BIG_TBALE_SIZE, FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE);
hash_init(tables->read_table.hashtable);
init_file_context_common_table(&tables->read_table.common_table, tables->read_table.hashtable,
FILE_CONTEXT_BIG_TABLE_SIZE_BITS, FILE_CONTEXT_BIG_TBALE_SIZE, FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE);
hash_init(tables->write_table.hashtable);
init_file_context_common_table(&tables->write_table.common_table, tables->write_table.hashtable,
FILE_CONTEXT_BIG_TABLE_SIZE_BITS, FILE_CONTEXT_BIG_TBALE_SIZE, FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE);
return tables;
}
int acquire_file_context_entry(transport_id_t id)
{
int err = 0;
int idx = transport_id_index(id);
file_context_tables_t *entry = init_file_context_entry(id);
if (!entry) {
return -ENOMEM;
}
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.tables[idx])
{
WPRINTF("acquire_file_context_entry: %d, already exists", idx);
err = -EEXIST;
} else {
rcu_assign_pointer(global_fs_event_cache_manager.tables[idx], entry);
entry = NULL;
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (entry)
{
put_file_context_entry(entry);
}
return err;
}
/* This function requires lock*/
static inline void check_common_table_lru(file_context_common_table_t *common_table, unsigned int max_size, unsigned short clean_count)
{
file_context_common_node_t *common_node;
unsigned long now;
// Remove nodes if LRU list is too large
if (common_table->size > max_size)
{
while (clean_count)
{
common_node = list_first_entry_or_null(&common_table->lru_list, file_context_common_node_t, lru_list_node);
if (!common_node)
{
break;
}
remove_and_put_common_node(common_node, common_table);
clean_count--;
}
}
// Clean expired nodes
now = jiffies;
while (clean_count)
{
common_node = list_first_entry_or_null(&common_table->lru_list, file_context_common_node_t, lru_list_node);
if (!common_node)
{
break;
}
if (now < msecs_to_jiffies(FILE_CONTEXT_EXPIRE_TIME_MS) + common_node->last_access_time)
{
break;
}
remove_and_put_common_node(common_node, common_table);
clean_count--;
}
}
static void insert_common_node(file_context_common_table_t *common_table,
file_context_common_node_t *common_node,
uint64_t key,
uint8_t hashbits)
{
file_context_common_node_t *search_node;
bool inserted = false;
common_node->key = key;
/* RCU WRITER */
spin_lock(&common_table->spinlock);
list_add_tail(&common_node->lru_list_node, &common_table->lru_list);
common_node->lru_list_node_inserted = true;
// iterate through the hashtable to find the same key, if it exists, prefer the new node
hash_for_each_possible_with_hashbits(common_table->hashtable, search_node, hash_node, key, hashbits)
{
if (common_node->key == search_node->key)
{
// TODO: it is better to ensure that file_key does not match but this approach is good enough
remove_common_node_from_lru(search_node);
hlist_replace_rcu(&search_node->hash_node, &common_node->hash_node);
put_common_node(search_node);
inserted = true;
break;
}
}
if (!inserted)
{
hash_add_rcu_hashbits(common_table->hashtable, &common_node->hash_node, key, hashbits);
common_table->size += 1;
check_common_table_lru(common_table, common_table->max_size, common_table->clean_count);
}
spin_unlock(&common_table->spinlock);
/* RCU WRITER */
}
static void *malloc_node(size_t size)
{
dummy_node_t *node = NULL;
void *mem = mem_alloc0(size);
if (!mem)
{
return NULL;
}
node = (dummy_node_t *)mem;
node->common_node.last_access_time = jiffies;
atomic_set(&node->common_node.ref_count, 1);
return mem;
}
static inline file_context_common_node_t *find_hash_node(struct hlist_head *head, uint64_t key, uint8_t hashbits)
{
file_context_common_node_t *tmp = NULL;
hash_for_each_possible_with_hashbits(head, tmp, hash_node, key, hashbits)
{
if (tmp->key == key)
{
return tmp;
}
}
return NULL;
}
static inline file_context_common_node_t *find_hash_node_rcu(struct hlist_head *head, uint64_t key, uint8_t hashbits)
{
file_context_common_node_t *tmp = NULL;
hash_for_each_possible_rcu_with_hashbits(head, tmp, hash_node, key, hashbits)
{
if (tmp->key == key)
{
return tmp;
}
}
return NULL;
}
static inline file_context_common_node_t *lookup_common_node(file_context_common_table_t *table, uint64_t key, uint8_t hashbits)
{
file_context_common_node_t *common_node = NULL;
/* RCU READER */
rcu_read_lock();
common_node = find_hash_node_rcu(table->hashtable, key, hashbits);
if (common_node)
{
if (!get_common_node_rcu(common_node)) {
common_node = NULL;
}
}
rcu_read_unlock();
/* RCU READER */
if (common_node)
{
spin_lock(&table->spinlock);
common_node->last_access_time = jiffies;
if (common_node->lru_list_node_inserted) {
list_del(&common_node->lru_list_node);
list_add_tail(&common_node->lru_list_node, &table->lru_list);
}
spin_unlock(&table->spinlock);
}
return common_node;
}
static void lookup_common_node_all(file_context_table_type_t type
, uint64_t key
, file_context_common_node_t **common_nodes
, const transport_ids_t* ids
, bool* found_all)
{
int idx = 0;
*found_all = true;
for (; idx < MAX_TRANSPORT_SIZE; idx++)
{
transport_id_t transport_id = ids->ids[idx];
file_context_tables_t *tables;
if (!transport_id)
continue;
tables = get_file_context_entry(transport_id);
common_nodes[idx] = NULL;
if (tables)
{
file_context_common_node_t *common_node = NULL;
switch (type)
{
case FILE_CONTEXT_OPEN_TABLE:
common_node = lookup_common_node(&tables->open_table.common_table, key, tables->open_table.common_table.hashbits);
break;
case FILE_CONTEXT_READ_TABLE:
common_node = lookup_common_node(&tables->read_table.common_table, key, tables->read_table.common_table.hashbits);
break;
case FILE_CONTEXT_WRITE_TABLE:
common_node = lookup_common_node(&tables->write_table.common_table, key, tables->write_table.common_table.hashbits);
break;
}
if (common_node)
{
common_nodes[idx] = common_node;
} else {
*found_all = false;
}
put_file_context_entry(tables);
} else {
*found_all = false;
}
}
}
static int remove_common_node_by_key(file_context_common_table_t *common_table, uint64_t key)
{
file_context_common_node_t *common_node = NULL;
/* RCU WRITER */
spin_lock(&common_table->spinlock);
common_node = find_hash_node(common_table->hashtable, key, common_table->hashbits);
if (common_node)
{
if (common_node->lru_list_node_inserted) {
list_del(&common_node->lru_list_node);
common_node->lru_list_node_inserted = false;
}
hash_del_rcu(&common_node->hash_node);
common_table->size -= 1;
}
spin_unlock(&common_table->spinlock);
/* RCU WRITER */
if (common_node)
{
put_common_node(common_node);
return 0;
}
return -ENOENT;
}
// this function should be called inside common_table->spinlock
// and should put the returned common_node
static file_context_common_node_t * remove_common_node_by_key_no_lock(file_context_common_table_t *common_table, uint64_t key)
{
file_context_common_node_t *common_node = NULL;
common_node = find_hash_node(common_table->hashtable, key, common_table->hashbits);
if (common_node)
{
if (common_node->lru_list_node_inserted) {
list_del(&common_node->lru_list_node);
common_node->lru_list_node_inserted = false;
}
hash_del_rcu(&common_node->hash_node);
common_table->size -= 1;
}
return common_node;
}
static int remove_common_cache(int idx, uint64_t key, file_context_table_type_t type)
{
int ret = 0;
file_context_tables_t *tables = get_file_context_entry_by_idx(idx);
if (!tables)
{
return -ENOENT;
}
switch (type)
{
case FILE_CONTEXT_OPEN_TABLE:
ret = remove_common_node_by_key(&tables->open_table.common_table, key);
break;
case FILE_CONTEXT_READ_TABLE:
ret = remove_common_node_by_key(&tables->read_table.common_table, key);
break;
case FILE_CONTEXT_WRITE_TABLE:
ret = remove_common_node_by_key(&tables->write_table.common_table, key);
break;
}
if (ret == 0)
{
DPRINTF("remove_common_cache[%d]: %llu", idx, key);
}
put_file_context_entry(tables);
return ret;
}
void remove_common_cache_all(const file_key_t* file_key)
{
uint64_t key = file_key->ptr;
int i = 0;
for (; i < MAX_TRANSPORT_EXTENDED_SIZE; i++)
{
remove_common_cache(i, key, FILE_CONTEXT_OPEN_TABLE);
remove_common_cache(i, key, FILE_CONTEXT_READ_TABLE);
remove_common_cache(i, key, FILE_CONTEXT_WRITE_TABLE);
}
}
static inline int cmp_file_context_key(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
if (cache_key->file_key.ptr != current_key->file_key.ptr ||
cache_key->file_key.ino != current_key->file_key.ino ||
cache_key->file_key.gen != current_key->file_key.gen ||
cache_key->file_key.dev != current_key->file_key.dev)
{
return -1;
}
return 0;
}
static inline int cmp_file_context_update_time(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
if (cache_key->i_mtime.tv_sec != current_key->i_mtime.tv_sec ||
cache_key->i_mtime.tv_nsec != current_key->i_mtime.tv_nsec ||
cache_key->i_ctime.tv_sec != current_key->i_ctime.tv_sec ||
cache_key->i_ctime.tv_nsec != current_key->i_ctime.tv_nsec)
{
return -1;
}
return 0;
}
static inline int cmp_file_context(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
if (cmp_file_context_key(cache_key, current_key) == 0 &&
cmp_file_context_update_time(cache_key, current_key) == 0)
{
return 0;
}
return -1;
}
bool check_open_cache(const transport_ids_t* ids, file_context_info_t *info)
{
file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
bool skip;
int id = 0;
// skip is mapped into 'found_all'. if not all were found, skip is false
lookup_common_node_all(FILE_CONTEXT_OPEN_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip);
for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
{
file_context_open_file_node_t *file_node = NULL;
file_context_open_process_node_t *process_node = NULL;
file_context_common_node_t *common_node = NULL;
if (common_nodes[id] == NULL)
{
continue;
}
file_node = (file_context_open_file_node_t *)container_of(common_nodes[id], file_context_open_file_node_t, common_node);
// for open events caching, make sure that both file_ptr key and times match
if (cmp_file_context(&file_node->key, &info->msg_info.key) != 0)
{
skip = false;
put_common_node(&file_node->common_node);
continue;
}
common_node = lookup_common_node(&file_node->process_table.common_table, info->pid_version, file_node->process_table.common_table.hashbits);
if (!common_node)
{
skip = false;
put_common_node(&file_node->common_node);
continue;
}
process_node = (file_context_open_process_node_t *)container_of(common_node, file_context_open_process_node_t, common_node);
if ((info->flags & atomic_read(&process_node->data.flags)) == info->flags)
{
info->msg_info.skipped_transport_ids[id] = ids->ids[id];
}
else
{
skip = false;
}
put_common_node(&file_node->common_node);
put_common_node(&process_node->common_node);
}
return skip;
}
static inline file_context_open_process_node_t *add_open_process_node(file_context_common_table_t *table,
const file_context_info_t *info)
{
file_context_open_process_node_t *open_process_node = NULL;
open_process_node = (file_context_open_process_node_t *)malloc_node(sizeof(file_context_open_process_node_t));
if (open_process_node)
{
get_common_node(&open_process_node->common_node);
atomic_set(&open_process_node->data.flags, 0);
insert_common_node(table, &open_process_node->common_node, info->pid_version, table->hashbits);
}
return open_process_node;
}
static inline void open_file_node_pre_free(void *common_node)
{
file_context_open_file_node_t *file_node =
(file_context_open_file_node_t *)container_of(common_node, file_context_open_file_node_t, common_node);
clear_common_table(&file_node->process_table.common_table);
}
static inline file_context_open_file_node_t *add_open_file_node(file_context_common_table_t *table,
const file_context_info_t *info,
file_context_open_process_node_t **target_process)
{
file_context_open_file_node_t *open_file_node = NULL;
file_context_open_process_node_t *open_process_node = NULL;
open_file_node = (file_context_open_file_node_t *)malloc_node(sizeof(file_context_open_file_node_t));
if (!open_file_node)
{
return NULL;
}
get_common_node(&open_file_node->common_node);
open_file_node->common_node.pre_free_func = open_file_node_pre_free;
hash_init(open_file_node->process_table.hashtable);
open_file_node->key = info->msg_info.key;
init_file_context_common_table(&open_file_node->process_table.common_table, open_file_node->process_table.hashtable,
FILE_CONTEXT_SMALL_TABLE_SIZE_BITS, FILE_CONTEXT_SMALL_TABLE_MAX_SIZE, FILE_CONTEXT_SMALL_TABLE_LRU_CLEAN_SIZE);
insert_common_node(table, &open_file_node->common_node, info->msg_info.key.file_key.ptr, table->hashbits);
open_process_node = add_open_process_node(&open_file_node->process_table.common_table, info);
if (open_process_node)
{
*target_process = open_process_node;
}
return open_file_node;
}
static int add_open_node(file_context_big_table_t *table,
const file_context_info_t *info,
file_context_open_file_node_t **file_node, file_context_open_process_node_t **process_node)
{
file_context_open_file_node_t *tmp_file_node = NULL;
file_context_open_process_node_t *tmp_process_node = NULL;
file_context_common_node_t *common_node = NULL;
common_node = lookup_common_node(&table->common_table, info->msg_info.key.file_key.ptr, table->common_table.hashbits);
if (common_node)
{
tmp_file_node = (file_context_open_file_node_t *)container_of(common_node, file_context_open_file_node_t, common_node);
// If file_key mismatches, reinsert the node so 'forget' that we found a common_node
if (cmp_file_context(&tmp_file_node->key, &info->msg_info.key) != 0)
{
put_common_node(common_node);
common_node = NULL;
}
}
if (common_node)
{
tmp_file_node = (file_context_open_file_node_t *)container_of(common_node, file_context_open_file_node_t, common_node);
common_node = lookup_common_node(&tmp_file_node->process_table.common_table, info->pid_version, tmp_file_node->process_table.common_table.hashbits);
if (common_node)
{
tmp_process_node = (file_context_open_process_node_t *)container_of(common_node, file_context_open_process_node_t, common_node);
}
else
{
tmp_process_node = add_open_process_node(&tmp_file_node->process_table.common_table, info);
}
}
else
{
tmp_file_node = add_open_file_node(&table->common_table, info, &tmp_process_node);
}
*file_node = tmp_file_node;
*process_node = tmp_process_node;
if (*file_node && *process_node)
{
return 0;
}
return -ENOENT;
}
int add_open_cache(transport_id_t id, const file_context_info_t *info,
file_context_open_file_t **file_node_data, file_context_open_process_t **process_node_data)
{
int ret = 0;
file_context_open_file_node_t* file_node = NULL;
file_context_open_process_node_t* process_node = NULL;
file_context_tables_t *tables = get_file_context_entry(id);
if (!tables)
{
return -ENOENT;
}
ret = add_open_node(&tables->open_table, info, &file_node, &process_node);
if (ret == 0)
{
DPRINTF("add_open_cache[%llu]: %llu, %llu", id, info->msg_info.key.file_key.ptr, info->pid_version);
}
else
{
EPRINTF("add_open_cache failed[%llu]: %llu, %llu", id, info->msg_info.key.file_key.ptr, info->pid_version);
}
put_file_context_entry(tables);
*file_node_data = file_node ? &file_node->data : NULL;
*process_node_data = process_node ? &process_node->data : NULL;
return ret;
}
void put_open_cache(file_context_open_file_t *file_node_data, file_context_open_process_t *process_node_data)
{
file_context_open_file_node_t *file_node;
file_context_open_process_node_t *process_node;
if (file_node_data)
{
file_node = container_of(file_node_data, file_context_open_file_node_t, data);
put_common_node(&file_node->common_node);
}
if (process_node_data)
{
process_node = container_of(process_node_data, file_context_open_process_node_t, data);
put_common_node(&process_node->common_node);
}
}
/* This function requires lock*/
static interval_node_t *malloc_interval_node(uint64_t low, uint64_t high, interval_set_t *set)
{
interval_node_t *node = mem_alloc0(sizeof(interval_node_t));
if (!node)
{
return NULL;
}
RB_CLEAR_NODE(&node->rb);
node->low = low;
node->high = high;
set->interval_count++;
#ifdef INTERVAL_SET_DEBUG
set->total_interval_size += (node->high - node->low);
#endif
return node;
}
/* This function requires lock*/
static void remove_interval_node(struct rb_node *rb_node, interval_set_t *set)
{
interval_node_t *node;
if (!rb_node)
{
return;
}
node = rb_entry(rb_node, interval_node_t, rb);
set->interval_count--;
#ifdef INTERVAL_SET_DEBUG
set->total_interval_size -= (node->high - node->low);
#endif
rb_erase(rb_node, &set->root);
mem_free(node);
}
/* This function requires lock*/
void clean_interval_tree(interval_set_t *set)
{
struct rb_node *rb_node = set->root.rb_node;
while (rb_node)
{
remove_interval_node(rb_node, set);
rb_node = set->root.rb_node;
}
}
// node contains (low, high)
static bool contain(interval_node_t *node, uint64_t low, uint64_t high)
{
if (node->low <= low && high <= node->high)
return true;
return false;
}
#ifndef list_last_entry
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
#endif
/* This function requires lock
For each node and new node, possible situation:
1. node contains new node, return true
2. new node contains node, remove overlapped node, check left and right
3. new node is less/greater than node, check left/right
4. new node is left/right overlapped with node, extend new node's low/high, remove node, check left/right
*/
static bool check_overlap(uint64_t *low, uint64_t *high, struct rb_root *root, struct list_head *del_list)
{
interval_node_t *cur, *next;
struct list_head stack;
struct rb_node *rb_node;
INIT_LIST_HEAD(&stack);
rb_node = root->rb_node;
if (!rb_node)
{
return false;
}
cur = rb_entry(rb_node, interval_node_t, rb);
list_add_tail(&cur->stack_node, &stack);
while (!list_empty(&stack))
{
cur = list_last_entry(&stack, interval_node_t, stack_node);
list_del(&cur->stack_node);
// assume that s0 is current node, s1 is new node
// current node contains new node
/*
tree:
(14,20)
(4,7) (21,22)
(1,3) (9,13)
stack: (14,20)
new node: (17,18)
(17,18) is contained by (14,20)
____s0---s1=s1--s0____
14 17 18 20
do nothing, return true
*/
if (contain(cur, *low, *high))
{
return true;
}
// new node contains current node
/*
tree:
(14,17)
(4,7) (18,19)
(1,3) (9,13)
stack: (14,17)
new node: (10,20)
(10,20) contains (14,17)
____s1====s0---s0===s1____
10 14 17 20
after operation:
deleted list: (14,17)
stack: (18,19), (4,7)
*/
if ((*low < cur->low) && (*high > cur->high))
{
list_add_tail(&cur->del_list_node, del_list);
if (cur->rb.rb_right)
{
next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
if (cur->rb.rb_left)
{
next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is less than current node
/*
tree:
(4,7)
(1,3) (9,13)
stack: (4,7)
new node: (0,2)
(0,2) is less than (4,7)
____s1==s1__s0---s0____
0 2 4 7
after operation:
deleted list:
stack: (1,3)
*/
if (*high < cur->low)
{
if (cur->rb.rb_left)
{
next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is left overlapped with current node
/*
tree:
(4,8)
(1,3) (9,13)
stack: (4,8)
new node: (2,6)
(2,6) is left overlapped with (4,8)
____s1==s0xxs1--s0____
2 4 6 8
after operation:
new node->(2, 8)
deleted list: (4,8)
stack: (1,3)
*/
else if (*high <= cur->high)
{
list_add_tail(&cur->del_list_node, del_list);
*high = cur->high;
if (cur->rb.rb_left)
{
next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is greater than node
/*
tree:
(10,17)
(4,7) (18,19)
stack: (10,17)
new node: (20,30)
(20,30) is greater than (10,17)
____s0-------s0___s1=========s1____
10 17 20 30
after operation:
deleted list:
stack: (18,19)
*/
if (*low > cur->high)
{
if (cur->rb.rb_right)
{
next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is right overlapped with node
/*
tree:
(10,17)
(4,7) (18,19)
stack: (10,17)
new node: (15,30)
(15,30) is right overlapped with (10,17)
s1
____s0-----s1xxs0=========s1____
10 15 17 30
after operation:
new node->(10, 30)
deleted list:(10,17)
stack: (18,19)
*/
else if (cur->low <= *low)
{
list_add_tail(&cur->del_list_node, del_list);
*low = cur->low;
if (cur->rb.rb_right)
{
next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
EPRINTF("Something wrong");
}
return false;
}
/* This function requires lock
1. find the overlapped interval
2. remove overlapped interval
3. insert new interval
*/
bool insert_interval(uint64_t low, uint64_t high, interval_set_t *set)
{
struct rb_node **new, *parent = NULL;
interval_node_t *new_node, *cur_node;
struct list_head del_list;
uint64_t l = low, h = high;
INIT_LIST_HEAD(&del_list);
if (check_overlap(&l, &h, &set->root, &del_list))
{
return true;
}
new_node = malloc_interval_node(l, h, set);
if (!new_node)
{
return false;
}
while (!list_empty(&del_list))
{
// erase from leaf node
cur_node = list_last_entry(&del_list, interval_node_t, del_list_node);
list_del(&cur_node->del_list_node);
remove_interval_node(&cur_node->rb, set);
}
new = &set->root.rb_node;
while (*new)
{
parent = *new;
cur_node = rb_entry(parent, interval_node_t, rb);
if (new_node->high < cur_node->low)
new = &parent->rb_left;
else
new = &parent->rb_right;
}
rb_link_node(&new_node->rb, parent, new);
rb_insert_color(&new_node->rb, &set->root);
return true;
}
/* This function requires lock
1. node contains new node, return true
2. less/greater, check left/right
3. overlapped/new node contains node, return false
*/
bool check_interval(uint64_t low, uint64_t high, interval_set_t *set)
{
interval_node_t *cur;
struct rb_node *rb_node = set->root.rb_node;
while (rb_node)
{
cur = rb_entry(rb_node, interval_node_t, rb);
if (contain(cur, low, high))
return true;
if (high < cur->low)
{
rb_node = rb_node->rb_left;
}
else if (low > cur->high)
{
rb_node = rb_node->rb_right;
}
else
{
// overlapped
return false;
}
}
return false;
}
static file_context_rw_node_t *add_rw_cache_node(transport_id_t id, file_context_info_t *info, file_context_table_type_t type);
/*
Send read events only the first time the file is fully read
The interval will be reset when the file is changed
Skip event when return is true.
*/
bool check_and_update_read_cache(const transport_ids_t* ids, file_context_info_t *info)
{
file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
bool skip = true;
bool found_all = false;
int id = 0;
loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr);
if (file_size < 0 || file_size > FILE_CONTEXT_MAX_FILE_SIZE)
{
return true;
}
lookup_common_node_all(FILE_CONTEXT_READ_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &found_all);
for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
{
transport_id_t transport_id = ids->ids[id];
file_context_rw_node_t *read_node = NULL;
// By default skip the read event
info->msg_info.skipped_transport_ids[id] = transport_id;
if (common_nodes[id])
{
read_node = (file_context_rw_node_t *)container_of(common_nodes[id], file_context_rw_node_t, common_node);
if (cmp_file_context_key(&read_node->key, &info->msg_info.key) != 0)
{
put_common_node(common_nodes[id]);
common_nodes[id] = NULL;
read_node = NULL;
}
}
if (common_nodes[id] == NULL)
{
read_node = add_rw_cache_node(ids->ids[id], info, FILE_CONTEXT_READ_TABLE);
if (read_node)
{
common_nodes[id] = &read_node->common_node;
}
}
if (common_nodes[id] == NULL)
{
continue;
}
spin_lock(&read_node->data.spinlock);
if (read_node->data.interval_set.interval_count < (uint64_t)file_size)
{
insert_interval(info->low, info->high, &read_node->data.interval_set);
}
if (check_interval(0, file_size, &read_node->data.interval_set))
{
if (atomic_cmpxchg(&read_node->data.is_reported, false, true) == false)
{
// send full read event by setting this flag
info->msg_info.skipped_transport_ids[id] = 0;
skip = false;
}
}
spin_unlock(&read_node->data.spinlock);
put_common_node(common_nodes[id]);
}
return skip;
}
// Skip event when return is true.
bool check_write_cache(const transport_ids_t* ids, file_context_info_t *info, file_context_table_type_t type)
{
file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
bool skip;
int id = 0;
lookup_common_node_all(type, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip);
for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
{
file_context_rw_node_t *write_node = NULL;
if (common_nodes[id] == NULL)
{
continue;
}
write_node = (file_context_rw_node_t *)container_of(common_nodes[id], file_context_rw_node_t, common_node);
if (cmp_file_context_key(&write_node->key, &info->msg_info.key) == 0)
{
spin_lock(&write_node->data.spinlock);
if (check_interval(info->low, info->high, &write_node->data.interval_set))
{
info->msg_info.skipped_transport_ids[id] = ids->ids[id];
}
else
{
skip = false;
}
spin_unlock(&write_node->data.spinlock);
}
else
{
skip = false;
}
put_common_node(common_nodes[id]);
}
return skip;
}
static inline void rw_node_pre_free(void *common_node)
{
file_context_rw_node_t *rw_node =
(file_context_rw_node_t *)container_of(common_node, file_context_rw_node_t, common_node);
spin_lock(&rw_node->data.spinlock);
clean_interval_tree(&rw_node->data.interval_set);
spin_unlock(&rw_node->data.spinlock);
}
static file_context_rw_node_t *add_rw_node(file_context_big_table_t *table,
file_context_info_t *info)
{
file_context_rw_node_t *node = NULL;
file_context_common_node_t *common_node = NULL;
loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr);
if (file_size < 0 || file_size > FILE_CONTEXT_MAX_FILE_SIZE || info->low > (uint64_t) file_size)
{
return NULL;
}
if (info->high > (uint64_t) file_size)
{
info->high = (uint64_t)file_size;
}
common_node = lookup_common_node(&table->common_table, info->msg_info.key.file_key.ptr, table->common_table.hashbits);
if (common_node)
{
node = (file_context_rw_node_t *)container_of(common_node, file_context_rw_node_t, common_node);
// For 'read' and 'write' events, time changes are not important.
// It is the fact that particular region was accessed that matters
if (cmp_file_context_key(&node->key, &info->msg_info.key) != 0)
{
put_common_node(common_node);
node = NULL;
}
}
if (!node)
{
node = (file_context_rw_node_t *)malloc_node(sizeof(file_context_rw_node_t));
if (node)
{
get_common_node(&node->common_node);
node->common_node.pre_free_func = rw_node_pre_free;
node->key = info->msg_info.key;
spin_lock_init(&node->data.spinlock);
node->data.interval_set.root = RB_ROOT;
insert_common_node(&table->common_table, &node->common_node, info->msg_info.key.file_key.ptr, table->common_table.hashbits);
}
}
if (!node)
{
return NULL;
}
spin_lock(&node->data.spinlock);
if (node->data.interval_set.interval_count < (uint64_t)file_size)
{
insert_interval(info->low, info->high, &node->data.interval_set);
}
spin_unlock(&node->data.spinlock);
return node;
}
static file_context_rw_node_t *add_rw_cache_node(transport_id_t id, file_context_info_t *info, file_context_table_type_t type)
{
file_context_rw_node_t *node = NULL;
file_context_tables_t *tables = get_file_context_entry(id);
if (!tables)
{
return NULL;
}
info->low = FILE_CONTEXT_CHUNK_LOWER_BOUND(info->low);
info->high = FILE_CONTEXT_CHUNK_UPPER_BOUND(info->high);
switch (type)
{
case FILE_CONTEXT_READ_TABLE:
node = add_rw_node(&tables->read_table, info);
break;
case FILE_CONTEXT_WRITE_TABLE:
node = add_rw_node(&tables->write_table, info);
break;
default:
break;
}
if (node)
{
DPRINTF("add_rw_cache_node[%llu]: %llu", id, info->msg_info.key.file_key.ptr);
}
else
{
EPRINTF("add_rw_cache_node failed[%llu]: %llu", id, info->msg_info.key.file_key.ptr);
}
put_file_context_entry(tables);
return node;
}
file_context_rw_t *add_rw_cache(transport_id_t id, file_context_info_t *info, file_context_table_type_t type)
{
file_context_rw_node_t *node = add_rw_cache_node(id, info, type);
return node ? &node->data : NULL;
}
void put_rw_cache(file_context_rw_t *node_data)
{
file_context_rw_node_t *node = container_of(node_data, file_context_rw_node_t, data);
put_common_node(&node->common_node);
}
static inline file_context_process_node_t *lookup_or_add_process_node(file_context_common_table_t *table,
const file_context_info_t *info)
{
file_context_process_node_t *process_node = NULL;
file_context_common_node_t *common_node = NULL;
common_node = lookup_common_node(table, info->unique_pid, table->hashbits);
if (common_node)
{
process_node = (file_context_process_node_t *)container_of(common_node, file_context_process_node_t, common_node);
}
if (!process_node)
{
process_node = (file_context_process_node_t *)malloc_node(sizeof(file_context_process_node_t));
if (!process_node)
{
return NULL;
}
get_common_node(&process_node->common_node);
atomic_set(&process_node->data.flags, 0);
insert_common_node(table, &process_node->common_node, info->unique_pid, table->hashbits);
}
return process_node;
}
static inline void file_modify_node_pre_free(void *common_node)
{
file_context_file_modify_node_t *file_node =
(file_context_file_modify_node_t *)container_of(common_node, file_context_file_modify_node_t, common_node);
clear_common_table(&file_node->process_table.common_table);
}
static file_context_file_modify_node_t *lookup_or_add_file_modify_node(file_context_common_table_t *table, const file_context_info_t *info)
{
file_context_file_modify_node_t *tmp_file_node = NULL;
file_context_common_node_t *common_node = NULL;
common_node = lookup_common_node(table, info->msg_info.key.file_key.ptr, table->hashbits);
if (common_node)
{
tmp_file_node = (file_context_file_modify_node_t *)container_of(common_node, file_context_file_modify_node_t, common_node);
// If file_key mismatches, reinsert the node so 'forget' that we found a common_node
if (cmp_file_context(&tmp_file_node->key, &info->msg_info.key) != 0)
{
put_common_node(common_node);
tmp_file_node = NULL;
}
}
if (!tmp_file_node)
{
tmp_file_node = (file_context_file_modify_node_t *)malloc_node(sizeof(file_context_file_modify_node_t));
if (!tmp_file_node)
{
return NULL;
}
get_common_node(&tmp_file_node->common_node);
tmp_file_node->common_node.pre_free_func = file_modify_node_pre_free;
hash_init(tmp_file_node->process_table.hashtable);
tmp_file_node->key = info->msg_info.key;
init_file_context_common_table(&tmp_file_node->process_table.common_table, tmp_file_node->process_table.hashtable,
FILE_CONTEXT_SMALL_TABLE_SIZE_BITS, FILE_CONTEXT_SMALL_TABLE_MAX_SIZE, FILE_CONTEXT_SMALL_TABLE_LRU_CLEAN_SIZE);
insert_common_node(table, &tmp_file_node->common_node, info->msg_info.key.file_key.ptr, table->hashbits);
}
return tmp_file_node;
}
static bool add_file_modify_cache_(file_context_big_table_t *table, const file_context_info_t *info)
{
file_context_file_modify_node_t *tmp_file_node = NULL;
file_context_process_node_t *tmp_process_node = NULL;
tmp_file_node = lookup_or_add_file_modify_node(&table->common_table, info);
if (!tmp_file_node)
{
return false;
}
tmp_process_node = lookup_or_add_process_node(&tmp_file_node->process_table.common_table, info);
if (!tmp_process_node)
{
put_common_node(&tmp_file_node->common_node);
return false;
}
atomic_set(&tmp_process_node->data.flags, 1);
put_common_node(&tmp_process_node->common_node);
put_common_node(&tmp_file_node->common_node);
return true;
}
bool add_file_modify_cache(file_context_info_t *info)
{
bool ret = false;
file_context_tables_t *tables = get_file_context_entry(MAX_TRANSPORT_EXTENDED_SIZE - 1);
if (!tables)
{
EPRINTF("add_file_modify_cache: failed to get table");
return false;
}
ret = add_file_modify_cache_(&tables->write_table, info);
if (ret)
{
DPRINTF("add_file_modify_cache: %llu", info->msg_info.key.file_key.ptr);
}
else
{
EPRINTF("add_file_modify_cache failed: %llu", info->msg_info.key.file_key.ptr);
}
put_file_context_entry(tables);
return ret;
}
// This function should be called in close, it will remove the process/file from tables
bool check_update_file_modify_cache(file_context_info_t *info)
{
file_context_tables_t *tables;
bool modified = false;
file_context_common_node_t *common_node = NULL;
file_context_file_modify_node_t *file_node = NULL;
file_context_process_node_t *process_node = NULL;
tables = get_file_context_entry(MAX_TRANSPORT_EXTENDED_SIZE - 1);
if (!tables)
{
return false;
}
common_node = lookup_common_node(&tables->write_table.common_table, info->msg_info.key.file_key.ptr, tables->write_table.common_table.hashbits);
put_file_context_entry(tables);
if (!common_node)
{
return false;
}
file_node = (file_context_file_modify_node_t *)container_of(common_node, file_context_file_modify_node_t, common_node);
if (cmp_file_context(&file_node->key, &info->msg_info.key) != 0)
{
put_common_node(&file_node->common_node);
return false;
}
common_node = lookup_common_node(&file_node->process_table.common_table, info->unique_pid, file_node->process_table.common_table.hashbits);
if (!common_node)
{
put_common_node(&file_node->common_node);
return false;
}
process_node = (file_context_process_node_t *)container_of(common_node, file_context_process_node_t, common_node);
if (atomic_read(&process_node->data.flags))
{
modified = true;
}
{
// remove read already process node
file_context_common_node_t *common_nodes[2] = {NULL, NULL};
/* RCU WRITER */
spin_lock(&file_node->process_table.common_table.spinlock);
common_nodes[0] = remove_common_node_by_key_no_lock(&file_node->process_table.common_table, info->unique_pid);
if (file_node->process_table.common_table.size == 0)
{
spin_lock(&tables->write_table.common_table.spinlock);
common_nodes[1] = remove_common_node_by_key_no_lock(&tables->write_table.common_table, info->msg_info.key.file_key.ptr);
spin_unlock(&tables->write_table.common_table.spinlock);
}
spin_unlock(&file_node->process_table.common_table.spinlock);
/* RCU WRITER */
// put common_node from remove_common_node_by_key_no_lock
if (common_nodes[0])
{
put_common_node(common_nodes[0]);
}
if (common_nodes[1])
{
put_common_node(common_nodes[1]);
}
}
put_common_node(&file_node->common_node);
put_common_node(&process_node->common_node);
return modified;
}
int acquire_file_modify_entry(void)
{
file_context_tables_t *entry;
entry = init_file_context_entry(MAX_TRANSPORT_EXTENDED_SIZE - 1);
if (!entry)
{
EPRINTF("acquire_file_modify_entry: init_file_context_entry failed");
return -ENOMEM;
}
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1])
{
WPRINTF("acquire_file_context_entry: %d, already exists", MAX_TRANSPORT_EXTENDED_SIZE - 1);
}
else
{
rcu_assign_pointer(global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1], entry);
entry = NULL;
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (entry)
{
put_file_context_entry(entry);
}
IPRINTF("acquire_file_modify_entry\n");
return 0;
}
void release_file_modify_entry(void)
{
file_context_tables_t *entry = NULL;
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1])
{
entry = global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1];
rcu_assign_pointer(global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1], NULL);
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (entry)
{
put_file_context_entry(entry);
}
IPRINTF("release_file_modify_entry\n");
}
|