/**
@file
@brief Message transport between kernel and userspace
@details Copyright (c) 2017-2021 Acronis International GmbH
@author Mikhail Krivtsov (mikhail.krivtsov@acronis.com)
@since $Id: $
*/
#include "transport.h"
#include "debug.h"
#include "device.h"
#include "file_contexts.h"
#include "ftrace_hooks/ftrace_events.h"
#include "ftrace_hooks/fsnotify_listener.h"
#include "lsm_common.h"
#include "memory.h"
#include "message.h"
#include "si_fp_properties.h"
#include "syscall_common.h"
#include "task_info_map.h"
#include "tracepoints.h"
#include "transport_protocol.h"
#include <linux/bitmap.h>
#include <asm/io.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/list.h>
#include <linux/jiffies.h> // msecs_to_jiffies()
#include <linux/mutex.h>
#ifndef KERNEL_MOCK
#include <linux/sched.h>
#else
#include <mock/mock_sched.h>
#endif
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h> // copy_from_user(), copy_to_user()
#include <linux/vmalloc.h>
#include <linux/wait.h> // wait_event*(), wake_up*()
#define TRANSPORT_MSG_SIZE_MAX (1<<10)
// This size is reasonable to a good amount of messages, although it is really a bare minimum
#define TRANSPORT_MINIMAL_SHARED_DATA_QUEUE_SIZE (128 * 1024)
#define TRANSPORT_QUEUE_CAPACITY (0x1000 / sizeof(msg_t *))
#define TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS (60*1000)
#define TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS (5*1000)
#define TRANSPORT_PRINTF(format, args...) DPRINTF(format, ##args)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#define DATA_QUEUE_HEADER_SIZE sizeof(shared_data_queue_t)
#define DATA_QUEUE_ENTRY_HEADER_SIZE sizeof(data_queue_entry_t)
static transport_event_t* transport_event_new(void)
{
transport_event_t* event = mem_alloc0(sizeof(transport_event_t));
if (!event)
return NULL;
atomic_set(&event->refcount, 1);
init_waitqueue_head(&event->msg_wait_queue);
return event;
}
static transport_event_t* transport_event_ref(transport_event_t* event)
{
atomic_inc(&event->refcount);
return event;
}
static void transport_event_unref(transport_event_t* event)
{
if (atomic_dec_and_test(&event->refcount))
mem_free(event);
}
transport_global_t transport_global;
static void transport_global_init(void)
{
mutex_init(&transport_global.transport_mutex);
transport_global.transport_count = 0;
transport_global.transports = (transports_t) {0};
atomic64_set(&transport_global.msg_id_sequence, 0);
transport_global.last_transport_seq_num = 0;
transport_global.transport_ids = (transport_ids_t) {0};
}
// must be called under 'transport_global.transport_mutex'
static transport_id_t transport_acquire_id(void) {
transport_id_t transport_id = 0;
int i;
for (i = 0; i < MAX_TRANSPORT_SIZE; i++) {
if (0 == READ_ONCE(transport_global.transport_ids.ids[i])) {
transport_id = transport_id_make(++transport_global.last_transport_seq_num, i);
WRITE_ONCE(transport_global.transport_ids.ids[i], transport_id);
break;
}
}
return transport_id;
}
// must be called under 'transport_global.transport_mutex'
static void transport_release_id(transport_id_t id) {
int i = transport_id_index(id);
if (id == READ_ONCE(transport_global.transport_ids.ids[i])) {
WRITE_ONCE(transport_global.transport_ids.ids[i], 0);
} else {
WPRINTF("transport id %llu not found", id);
}
}
// must be called under 'transport_global.transport_mutex'
static void transport_global_register(transport_t *transport)
{
int idx = transport_id_index(transport->transport_id);
rcu_assign_pointer(transport_global.transports.transports[idx], transport);
WRITE_ONCE(transport_global.transports.control_tgid[idx], transport->control_tgid);
}
// must be called under 'transport_global.transport_mutex'
static void transport_global_unregister(transport_t *transport)
{
int idx = transport_id_index(transport->transport_id);
rcu_assign_pointer(transport_global.transports.transports[idx], NULL);
WRITE_ONCE(transport_global.transports.control_tgid[idx], 0);
}
// must be called under 'transport_global.transport_mutex'
static void transport_global_recalculate_combined_event_mask_impl(void)
{
int i;
uint64_t combined_mask = 0;
for (i = 0; i < MAX_TRANSPORT_SIZE; i++) {
transport_t *transport;
transport_id_t transport_id = READ_ONCE(transport_global.transport_ids.ids[i]);
if (!transport_id)
continue;
// do not need to RCU access this pointer because under 'transport_mutex'
transport = transport_global.transports.transports[i];
combined_mask |= READ_ONCE(transport->events_mask);
}
WRITE_ONCE(transport_global.combined_events_mask, combined_mask);
}
struct event_subtype_relation_t
{
uint64_t events_mask;
uint64_t generated_subtype_mask;
};
static const struct event_subtype_relation_t k_subtypes_relations[] = {
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_OPEN) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPEN_MODIFY)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPEN_READ)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPEN_MAY_CREATE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPENDIR) },
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_OPEN) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPEN_MODIFY)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPEN_READ)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPEN_MAY_CREATE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPENDIR) },
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_CLOSE) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_CLOSE_NON_WRITE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_CLOSE_WRITE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_CLOSEDIR) },
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_CLOSE), MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_CLOSE_NON_WRITE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_CLOSE_WRITE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_CLOSEDIR) },
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_MMAP) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_MMAP_NON_WRITE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_MMAP_WRITE) },
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_MMAP) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_MMAP_NON_WRITE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_MMAP_WRITE) },
// All fs operations relate to generic subtypes like 'special'
{ MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_OPEN)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_OPEN)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_CREATE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_RENAME)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_RENAME)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_RENAME)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_UNLINK)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_UNLINK)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_CLOSE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_CLOSE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_OPEN)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_RENAME)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_UNLINK)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_CREATE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_MMAP)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_MMAP)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_LINK)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_LINK)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_TRUNCATE)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_TRUNCATE), MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SPECIAL) },
};
// If any transport includes subtype, combined should have the subtype
static void transport_global_recalculate_combined_event_subtype_inclusion_mask_impl(void)
{
int i;
uint64_t combined_mask = 0;
for (i = 0; i < MAX_TRANSPORT_SIZE; i++) {
int j;
transport_t *transport;
transport_id_t transport_id = READ_ONCE(transport_global.transport_ids.ids[i]);
if (!transport_id)
continue;
// do not need to RCU access this pointer because under 'transport_mutex'
transport = transport_global.transports.transports[i];
for (j = 0; j < (int) ARRAY_SIZE(k_subtypes_relations); j++) {
const struct event_subtype_relation_t* relation = &k_subtypes_relations[j];
if (transport->events_mask & relation->events_mask) {
combined_mask |= transport->events_subtype_inclusion_mask & relation->generated_subtype_mask;
}
}
}
WRITE_ONCE(transport_global.combined_events_subtype_inclusion_mask, combined_mask);
}
// If all transports excludes subtype, combined excludes subtype
static void transport_global_recalculate_combined_event_subtype_exclusion_mask_impl(void)
{
int i;
uint64_t combined_mask = ~0ULL;
for (i = 0; i < MAX_TRANSPORT_SIZE; i++) {
int j;
uint64_t transport_not_excluded_subtypes;
transport_t *transport;
transport_id_t transport_id = READ_ONCE(transport_global.transport_ids.ids[i]);
if (!transport_id)
continue;
// do not need to RCU access this pointer because under 'transport_mutex'
transport = transport_global.transports.transports[i];
transport_not_excluded_subtypes = ~transport->events_subtype_exclusion_mask;
for (j = 0; j < (int) ARRAY_SIZE(k_subtypes_relations); j++) {
const struct event_subtype_relation_t* relation = &k_subtypes_relations[j];
if (transport->events_mask & relation->events_mask) {
// drop bits from the combined mask if it is known that generated events are not being excluded
combined_mask &= ~(transport_not_excluded_subtypes & relation->generated_subtype_mask);
}
}
}
WRITE_ONCE(transport_global.combined_events_subtype_exclusion_mask, combined_mask);
}
static void transport_global_recalculate_combined_all_event_masks_impl(void)
{
transport_global_recalculate_combined_event_mask_impl();
transport_global_recalculate_combined_event_subtype_inclusion_mask_impl();
transport_global_recalculate_combined_event_subtype_exclusion_mask_impl();
}
static void transport_global_recalculate_combined_all_event_masks(void)
{
mutex_lock(&transport_global.transport_mutex);
transport_global_recalculate_combined_all_event_masks_impl();
mutex_unlock(&transport_global.transport_mutex);
}
static void drop_msgs_impl(ring_t *ring)
{
while (!ring_is_empty(ring)) {
msg_t *msg = *(msg_t **) ring_consumer_ptr(ring);
msg_unref(msg);
ring_consumer_index_move_one(ring);
}
}
/*
'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in
'msg_reply_wait_count inc/dec'.
There is no need for separate 'msg ref/unref' calls.
*/
static void drop_sent_msgs_impl(set_t *set)
{
void *item_ptr = set_begin_ptr(set);
void *end_ptr = set_end_ptr(set);
while (item_ptr < end_ptr) {
msg_t *msg = *(msg_t **) item_ptr;
msg_reply_wait_count_dec(msg);
item_ptr = set_ptr_next(set, item_ptr);
}
set->count = 0;
}
static void transport_shutdown(transport_t *transport)
{
DPRINTF("transport=%p", transport);
spin_lock(&transport->msg_spinlock);
{
WRITE_ONCE(transport->events_mask, 0);
WRITE_ONCE(transport->shutdown, true);
// Discard undelivered messages
drop_msgs_impl(&transport->msg_ring);
// Discard messages waiting for 'reply'
drop_sent_msgs_impl(&transport->sent_msgs_set);
}
spin_unlock(&transport->msg_spinlock);
// wakeup all userspace 'read' waiters
wake_up_all(&transport->event->msg_wait_queue);
}
// identify and shutdown transport failed to reply
static void transport_shutdown_msg(transport_t *transport, msg_t *unreplied_msg)
{
bool found = false;
DPRINTF("transport=%p unreplied_msg=%p", transport, unreplied_msg);
spin_lock(&transport->msg_spinlock);
{
void *item_ptr = set_begin_ptr(&transport->sent_msgs_set);
void *end_ptr = set_end_ptr(&transport->sent_msgs_set);
while (item_ptr < end_ptr) {
if (unreplied_msg == *(msg_t **) item_ptr) {
found = true;
break;
}
item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr);
}
}
spin_unlock(&transport->msg_spinlock);
if (found) {
WPRINTF("deativating transport on reply wait timeout");
transport_shutdown(transport);
}
}
// identify and shutdown transport failed to reply
static void transport_global_shutdown_msg(msg_t *unreplied_msg)
{
int i;
DPRINTF("unreplied_msg=%p", unreplied_msg);
rcu_read_lock();
for (i = 0; i < MAX_TRANSPORT_SIZE; i++) {
transport_t *transport = rcu_dereference(transport_global.transports.transports[i]);
if (!transport)
continue;
transport_shutdown_msg(transport, unreplied_msg);
}
rcu_read_unlock();
}
static void transport_disable(transport_t *transport)
{
DPRINTF("transport=%p", transport);
transport_global_unregister(transport);
transport_shutdown(transport);
transport_release_id(transport->transport_id);
release_file_context_entry(transport->transport_id);
}
static void transport_free(transport_t *transport)
{
DPRINTF("transport=%p", transport);
IPRINTF("message queue items_count_max=%u capacity=%u",
ring_items_count_max(&transport->msg_ring),
ring_capacity(&transport->msg_ring));
IPRINTF("sent_msgs_set items_count_max=%u capacity=%u",
set_items_count_max(&transport->sent_msgs_set),
set_fetch_capacity(&transport->sent_msgs_set));
mem_free(ring_buffer(&transport->msg_ring));
mem_free(set_buffer(&transport->sent_msgs_set));
if (transport->queue) {
vfree(transport->queue);
}
transport_event_unref(transport->event);
mem_free(transport);
}
static bool transport_ring_init(ring_t *ring)
{
size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *);
msg_t **msgs;
bool success;
if (!buffer_size) {
msgs = NULL;
success = true;
} else {
msgs = mem_alloc0(buffer_size);
success = (bool) msgs;
}
ring_init(ring, msgs, buffer_size, sizeof(msg_t *));
return success;
}
static bool transport_set_init(set_t *set)
{
size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *);
msg_t **msgs;
bool success;
if (!buffer_size) {
msgs = NULL;
success = true;
} else {
msgs = mem_alloc0(buffer_size);
success = (bool) msgs;
}
set_init(set, msgs, buffer_size, sizeof(msg_t *));
return success;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Shared with userspace Data Queue implementation
#ifndef smp_store_release
#define smp_store_release(p, v) \
do { \
barrier(); \
WRITE_ONCE(*p, v); \
} while (0)
#endif
#ifndef smp_load_acquire
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
barrier(); \
___p1; \
})
#endif
#define DATA_QUEUE_ENTRY_AT(queue, v) (data_queue_entry_t*)((uint8_t *)queue->entries + v)
#ifdef ROUND_UP
#undef ROUND_UP
#endif
#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
static inline void data_queue_write_new_entry(shared_data_queue_t *queue, uint32_t offset, const SiEvent* event)
{
data_queue_entry_t *entry = DATA_QUEUE_ENTRY_AT(queue, offset);
memcpy(entry, event, event->Size);
}
// This function is called from data queue 'writer' under the spin_lock as it is NOT thread-safe
// As such, reads from 'queue->tail' can be done use 'READ_ONCE' (relaxed), writes must be done using 'smp_store_release'.
// 'reader' may decide to alter 'queue->head' so 'smp_load_acquire' must be used to read it, writes are NOT allowed.
static bool transport_shared_data_queue_enqueue_impl(transport_t *transport, const SiEvent* event, transport_event_t **deferred_wakeup)
{
uint32_t head, tail, new_tail;
uint32_t entry_size = event->Size;
shared_data_queue_t *queue = transport->queue;
uint32_t queue_size = transport->queue_size;
transport->bytes_written += entry_size;
// Notice that we are not doing any memory shenanigans here to load tail & head.
// The barriers will be done later if it will appear that they are necessary.
// !!! 'head' might not be synchronized with 'reader', it is OK and will be handled in the end.
tail = READ_ONCE(queue->tail);
head = smp_load_acquire(&queue->head);
// Check for unreasonable 'tail' or 'head', must never happen.
if (queue_size < tail || queue_size < head) {
WPRINTF("Invalid tail/head detected: tail=%u, head=%u, size=%u"
, (unsigned) tail, (unsigned) head, (unsigned) queue_size);
return false;
}
// Start inserting the contents of 'data' in the shared data queue
if (tail >= head) {
// Tail is further than head, it is a regular scenario. Handle it
// head tail
// V V
// -----|*************|-----------------
// ^ ^
// data to be dequeued |
// free space
if ((tail + entry_size) <= queue_size) {
// There is enough buffer in the 'tail' of the queue, write the entry and move the tail
// head tail new_tail
// V V V
// -----|*************|+++++++|-------
// ^
// new entry
data_queue_write_new_entry(queue, tail /*off*/, event);
new_tail = tail + entry_size;
} else if (head > entry_size) {
// As first condition did not satisfy, cannot put data after 'tail'
// Have to loop back to the start and there is enough space before userspace 'head'
// head tail
// V V
// |++++++|------------|*************|?? <- zapped entry w/ size>queue_size-tail, if fits
// ^ ^
// off new_tail
// Need to explain the userspace that current entry is too long to fit.
// If there is not enough space to even place a entry header, do nothing.
// Otherwise, deliberately zap the entry by putting 'data_size' that is too big.
if ((queue_size - tail) >= DATA_QUEUE_ENTRY_HEADER_SIZE) {
data_queue_entry_t *entry_to_zap = DATA_QUEUE_ENTRY_AT(queue, tail);
entry_to_zap->size = entry_size;
// do not touch 'entry_to_zap->data', it is bogus. entry just says go to the start
}
// Write data at the beginning of the queue
data_queue_write_new_entry(queue, 0 /*off*/, event);
new_tail = /*off==0 + */ entry_size;
} else {
// There is neither enough space after 'tail' nor before 'head', bail
WPRINTF("No more space is left, head=%u, tail=%u, entry_size=%u, written=%llu", head, tail, entry_size, transport->bytes_written);
return false;
}
} else {
// Catching to the 'head' from the other size.
// tail head
// V V
// ****|--------------|***************
// Insert can still be done if 'head' will not be overrun
if ((head - tail) > entry_size) {
// tail head
// V V
// ****|+++++|------|***************
// ^
// new_tail
data_queue_write_new_entry(queue, tail, event);
new_tail = tail + entry_size;
} else {
// There is not enough space not to overrun 'head', bail
WPRINTF("No more space is left, head=%u, tail=%u, entry_size=%u, written=%llu", head, tail, entry_size, transport->bytes_written);
return false;
}
}
// Expose all the content written in this thread as per 'release' semantics.
// Reader must do 'smp_store_acquire' on the same variable ('tail') to see 'entries' written.
// !!! This logic does NOT enforce 'tail' in 'reader' to be equal to 'tail' in 'writer'
new_tail = ROUND_UP(new_tail, sizeof(uint32_t));
smp_store_release(&queue->tail, new_tail);
// The new tail was published to the 'queue' but is it necessary to notify the 'reader'?
// If in the beginning 'tail == head', it means that userspace has finished reading all the
// content and is going to wait or is already waiting for the 'event'.
// In such case it is clear that will must notify the 'reader' no matter what.
// Moreover 'reader' cannot move the 'head' past the 'tail' so it is guaranteed that it is
// indeed the latest published 'head' by the reader.
if (tail != head) {
// If 'tail != head', it is not as clear. If it so happened that userspace moved the 'head'
// to be equal to 'tail' while 'writer' was adding the new entry, 'reader' will go 'wait'.
// So we must refresh the 'head' to ensure we actually do not need to wakeup the 'reader'.
// The other situation is also valid - 'writer' might delay writes to the head as 'atomic' ops.
// We need to make sure userspace will continue consuming events as we wrote the 'tail'.
// Whenever userspace will detect that its current 'tail==head', it will perform 'smb_mb'
// to fetch the new 'tail' we just wrote to ensure it does not need to consume anymore.
smp_mb();
head = READ_ONCE(queue->head);
}
if (tail == head) {
// atomic_ops.rst: atomic_read() and atomic_set() DO NOT IMPLY BARRIERS!
atomic_set(&transport->queue_event, 1);
// The data queue was empty, wakeup the 'reader' which is waiting for us.
// Use 'smp_wmb' to make sure 'tail' that we stored will be seen by the user.
// It is also fine if we did 'smp_mb' before, we will pair with 'smb_rmb' just fine.
// Also using 'smp_wmb' to ensure 'atomic_set' did set the 'queue_event'.
smp_wmb();
if (deferred_wakeup) {
*deferred_wakeup = transport_event_ref(transport->event);
} else {
wake_up_interruptible(&transport->event->msg_wait_queue);
}
TRANSPORT_PRINTF("woken up listener ht=%u nt=%u", tail, new_tail);
}
return true;
}
static long transport_queue_events_available(transport_t *transport)
{
uint32_t tail, head;
int ev;
int shutdown;
shared_data_queue_t *queue = READ_ONCE(transport->queue);
smp_rmb();
ev = atomic_xchg(&transport->queue_event, 0);
if (ev) {
TRANSPORT_PRINTF("check ev active");
return 1;
}
// This should not be necessary but doing it just in case.
tail = READ_ONCE(queue->tail);
head = READ_ONCE(queue->head);
shutdown = READ_ONCE(transport->shutdown);
TRANSPORT_PRINTF("check s=%u h=%u t=%u", shutdown, head, tail);
return shutdown || (head != tail);
}
// This function is called whenever userspace 'reader' deemed that there are no more events to read.
// It will be waiting for the data queue to gain new content using 'msg_wait_queue'.
// 'wake_up_interruptible' does 'wakeup' when it detects that the queue is being empty.
static long transport_data_queue_wait(transport_t *transport)
{
shared_data_queue_t *queue = READ_ONCE(transport->queue);
long ret;
if (!queue) {
EPRINTF("queue is NULL");
return -EINVAL;
}
if (wait_event_interruptible_exclusive(transport->event->msg_wait_queue, transport_queue_events_available(transport))) {
ret = -EINTR;
} else {
if (READ_ONCE(transport->shutdown)) {
ret = -EIO;
} else {
ret = 0;
}
}
return ret;
}
static int transport_data_queue_mmap(transport_t *transport, struct vm_area_struct *vma)
{
unsigned long sz = vma->vm_end - vma->vm_start;
void *ptr;
// Technically userspace may mmap part of the buffer but
// it is absolutely going to be a BUG later on when code will manage
// shared data queue so instead let's complain immediately
if (0 != vma->vm_pgoff) {
EPRINTF("mmaped offset is not zero");
return -EINVAL;
}
if (sz != transport->queue_size + DATA_QUEUE_HEADER_SIZE) {
EPRINTF("mmaped size is invalid, 0x%lx != 0x%x", sz, transport->queue_size);
return -EINVAL;
}
ptr = READ_ONCE(transport->queue);
if (!ptr) {
EPRINTF("queue is NULL");
return -EINVAL;
}
return remap_vmalloc_range(vma, transport->queue, 0);
}
static long data_queue_create(const data_queue_params_t *params, shared_data_queue_t **pqueue)
{
shared_data_queue_t *queue;
uint32_t size = params->size;
// Really 'DATA_QUEUE_HEADER_SIZE' is a bare minimum but
// let's use slightly larger size to fit at least a few messages
if (size <= TRANSPORT_MINIMAL_SHARED_DATA_QUEUE_SIZE) {
EPRINTF("size provided is too small");
return -EINVAL;
}
// check if size is PAGE_SIZE aligned because it will later be used in 'mmap'
if (size & (PAGE_SIZE - 1)) {
EPRINTF("size is not PAGE_SIZE aligned");
return -EINVAL;
}
queue = (shared_data_queue_t*) vmalloc_user(size);
if (!queue)
return -ENOMEM;
*pqueue = queue;
return 0;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static bool transport_send_msg_nowait(transport_t *transport, msg_t *msg, transport_event_t **deferred_wakeup)
{
bool need_wakeup = false;
const SiEvent *event = &msg->event;
spin_lock(&transport->msg_spinlock);
{
if (READ_ONCE(transport->shutdown)) {
spin_unlock(&transport->msg_spinlock);
return false;
}
if (FP_SI_CT_WANT_REPLY == event->CallbackType) {
unsigned item_index;
if (set_is_full(&transport->sent_msgs_set)) {
WPRINTF("'sent_msgs_set' overflow (capacity=%u)", set_fetch_capacity(&transport->sent_msgs_set));
spin_unlock(&transport->msg_spinlock);
transport_shutdown(transport);
return false;
}
item_index = set_items_count(&transport->sent_msgs_set);
/*
'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in
'msg_reply_wait_count inc/dec'.
There is no need for separate 'msg ref/unref' calls.
*/
*(msg_t **) set_item_ptr(&transport->sent_msgs_set, item_index) = msg_reply_wait_count_inc(msg);
set_items_count_set(&transport->sent_msgs_set, item_index + 1);
}
if (transport->queue) {
need_wakeup = false;
if (!transport_shared_data_queue_enqueue_impl(transport, event, deferred_wakeup)) {
WPRINTF("mmaped queue overflow");
spin_unlock(&transport->msg_spinlock);
transport_shutdown(transport);
return false;
}
} else {
need_wakeup = true;
if (ring_is_full(&transport->msg_ring)) {
WPRINTF("message queue overflow (capacity=%u)", ring_capacity(&transport->msg_ring));
spin_unlock(&transport->msg_spinlock);
transport_shutdown(transport);
return false;
}
*(msg_t **) ring_producer_ptr(&transport->msg_ring) = msg_ref(msg);
ring_producer_index_move_one(&transport->msg_ring);
}
}
spin_unlock(&transport->msg_spinlock);
if (need_wakeup) {
// wakeup userspace reader
if (deferred_wakeup)
*deferred_wakeup = transport_event_ref(transport->event);
else
wake_up_interruptible(&transport->event->msg_wait_queue);
}
return true;
}
static bool transport_send_hello_nowait(transport_t *transport)
{
msg_t *msg = hello_msg_new();
bool success;
if (!msg) {
success = false;
} else {
success = transport_send_msg_nowait(transport, msg, NULL /*deferred_wakeup*/);
msg_unref(msg);
}
return success;
}
static bool should_send(transport_t* transport, msg_t* msg)
{
transport_id_t transport_id = transport->transport_id;
bool ret = true;
int idx = transport_id_index(transport_id);
task_info_t* task_info = msg->task_info;
const SiEvent* event = &msg->event;
uint16_t operation = event->Operation;
if (transport->control_tgid == current->tgid) {
return false;
}
// Check if transport needs an event
if (!(READ_ONCE(transport->events_mask) & MSG_TYPE_TO_EVENT_MASK(operation))) {
return false;
}
if (msg->subtype_mask) {
if (!(READ_ONCE(transport->events_subtype_inclusion_mask) & msg->subtype_mask)) {
return false;
}
if (READ_ONCE(transport->events_subtype_exclusion_mask) & msg->subtype_mask) {
return false;
}
}
// Check if current transport does not need the event
if (msg->file_context_msg_info.skipped_transport_ids[idx] == transport_id) {
return false;
}
// Test task info, dependends on the type of the SiEvent
// task_info might not be available, in this case do nothing
if (!task_info) {
return true;
}
if (FP_SI_OT_NOTIFY_PROCESS_EXEC == operation)
{
uint64_t pid_version = msg->exec.pid_version;
ret = task_info_wants_exec_event(task_info, transport_id, pid_version);
}
else if (FP_SI_OT_NOTIFY_PROCESS_FORK == operation)
{
uint64_t pid_version = msg->fork.pid_version;
// FORK when task_info is provided is basically an EXEC event so update the pid_version
(void) task_info_wants_exec_event(task_info, transport_id, pid_version);
// for FORK event, always send
}
else
{
uint64_t listening_mask = ~0ULL;
if (READ_ONCE(task_info->contexts[idx].transport_id) == transport_id)
{
listening_mask = READ_ONCE(task_info->contexts[idx].data.listening_mask);
}
ret = !!(listening_mask & MSG_TYPE_TO_EVENT_MASK(operation));
}
return ret;
}
static bool send_msg_nowait(msg_t *msg)
{
bool sync = FP_SI_CT_WANT_REPLY == msg->event.CallbackType;
int i;
bool sent = false;
transport_event_t *deferred_wakeups[MAX_TRANSPORT_SIZE];
int deferred_wakeups_count = 0;
rcu_read_lock();
for (i = 0; i < MAX_TRANSPORT_SIZE; i++) {
transport_t *transport = rcu_dereference(transport_global.transports.transports[i]);
if (transport && should_send(transport, msg)) {
deferred_wakeups[deferred_wakeups_count] = NULL;
sent |= transport_send_msg_nowait(transport, msg, &deferred_wakeups[deferred_wakeups_count]);
if (deferred_wakeups[deferred_wakeups_count])
{
deferred_wakeups_count++;
}
}
}
rcu_read_unlock();
for (i = 0; i < deferred_wakeups_count; i++) {
// Wake up all deferred wakeups, last one wakeup synchronously to switch to it immediately
if (sync && (i == deferred_wakeups_count - 1)) {
wake_up_interruptible_sync(&deferred_wakeups[i]->msg_wait_queue);
} else {
wake_up_interruptible(&deferred_wakeups[i]->msg_wait_queue);
}
transport_event_unref(deferred_wakeups[i]);
}
return sent;
}
static transport_t *transport_new(void)
{
transport_t *transport = mem_alloc0(sizeof(transport_t));
if (transport) {
atomic_set(&transport->refcount, 1);
transport->transport_id = transport_acquire_id();
transport->bytes_written = 0;
if (0 == transport->transport_id)
{
EPRINTF("transport %p failed to acquire transport id", transport);
mem_free(transport);
return NULL;
}
transport->event = transport_event_new();
if (!transport->event)
{
EPRINTF("transport %p failed to allocate event", transport);
mem_free(transport);
return NULL;
}
acquire_file_context_entry(transport->transport_id);
// remember client's process doing 'open' to auto-ignore it
WRITE_ONCE(transport_global.transport_ids.ids[transport_id_index(transport->transport_id)], transport->transport_id);
transport->control_tgid = current->tgid;
spin_lock_init(&transport->msg_spinlock);
WRITE_ONCE(transport->events_mask, MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_HELLO)
| MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_PONG));
WRITE_ONCE(transport->events_subtype_inclusion_mask, ~0ULL);
WRITE_ONCE(transport->events_subtype_exclusion_mask, 0);
WRITE_ONCE(transport->shutdown, false);
transport->queue = NULL;
atomic_set(&transport->queue_event, 0);
if (transport_ring_init(&transport->msg_ring)
&& transport_set_init(&transport->sent_msgs_set)
&& transport_send_hello_nowait(transport)) {
transport_global_register(transport);
} else {
transport_disable(transport);
transport_free(transport);
transport = NULL;
}
}
DPRINTF("transport=%p", transport);
return transport;
}
int __init transport_mod_init(void)
{
int ret;
transport_global_init();
ret = device_mod_init();
if (ret) {
EPRINTF("'device_mod_init()' failure %i", ret);
}
return ret;
}
void transport_mod_down(void)
{
DPRINTF("");
device_mod_down();
DPRINTF("");
}
static msg_t *transport_lookup_msg_ref(transport_t *transport, msg_id_t reply_id) {
msg_t* msg = NULL;
DPRINTF("");
// TODO DK: Is it possible to use radix tree here instead?
spin_lock(&transport->msg_spinlock);
{
void *item_ptr = set_begin_ptr(&transport->sent_msgs_set);
void *end_ptr = set_end_ptr(&transport->sent_msgs_set);
while (item_ptr < end_ptr) {
msg_t *query = *(msg_t **) item_ptr;
if (query->id == reply_id) {
msg = query;
msg_ref(msg);
goto unlock;
}
item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr);
}
}
unlock:
spin_unlock(&transport->msg_spinlock);
DPRINTF("ret=%p", msg);
return msg;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static long install_file(struct file *file, int fd, int *pfd)
{
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
}
// file is consumed so no need to call 'fput'
fd_install(fd, file);
*pfd = fd;
return 0;
}
static long open_file_with_flags(const char *full_path, int uflags, int mode, int *pfd)
{
struct file *file;
int flags;
int fd;
DPRINTF("Opening file '%s', uflags '%d', mode '%d'", full_path, uflags, mode);
fd = get_unused_fd_compat();
if (fd < 0) {
return fd;
}
flags = uflags
#ifdef O_LARGEFILE
| O_LARGEFILE
#endif
#ifdef O_NOATIME
| O_NOATIME
#endif
// 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file.
#ifdef FMODE_NONOTIFY
| FMODE_NONOTIFY
#endif
;
file = filp_open(full_path, flags, mode);
return install_file(file, fd, pfd);
}
static inline long open_file(struct path *path, int *pfd)
{
struct file *file;
int flags;
int fd;
if (!path->dentry && !path->mnt) {
return -ENOENT;
}
fd = get_unused_fd_compat();
if (fd < 0) {
return fd;
}
flags = O_RDONLY
#ifdef O_LARGEFILE
| O_LARGEFILE
#endif
#ifdef O_NOATIME
| O_NOATIME
#endif
// 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file.
#ifdef FMODE_NONOTIFY
| FMODE_NONOTIFY
#endif
;
file = dentry_open_compat(path, flags);
if (IS_ERR(file)) {
// If open failed, let's try to open via the path.
// Notice that this open will be inside 'client' service context
// so this 'filp_open' has a good chance of failing as
// 'mount namespaces' might be different in the process.
// Perhaps a proper solution would be opening the file inside the original
// process context, but that would result in having to create 'file'
// early or to do extra context switches to the scanned process.
// Either way seems inefficient so it is currently avoided.
size_t size = PAGE_SIZE;
char *buf = mem_alloc(size);
const char *full_path;
if (!buf) {
return -ENOMEM;
}
full_path = d_path(path, buf, size);
if (!IS_ERR(full_path)) {
file = filp_open(full_path, flags, 0);
}
mem_free(buf);
}
return install_file(file, fd, pfd);
}
static long transport_ioctl_handle_open_file_from_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
msg_t* msg;
request_msg_img_t *msg_img = MSG_IMG(query_msg);
open_file_from_msg_img_t *img = IMG_PAYLOAD(msg_img);
if (MSG_SIZE(query_msg) < sizeof(request_msg_img_t) + sizeof(open_file_from_msg_img_t)) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
return -EINVAL;
}
msg = transport_lookup_msg_ref(transport, MSG_ID(query_msg));
if (!msg) {
ret = -ESRCH;
} else {
int fd = -1;
struct path path;
thread_safe_path_load(img->num == 0 ? &msg->path : &msg->path2, &path);
msg_unref(msg);
ret = open_file(&path, &fd);
path_put(&path);
if (0 == ret) {
ret = open_file_return_msg_new(reply_msg, fd);
}
}
return ret;
}
static long transport_ioctl_handle_open_file_by_path(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
char *path;
size_t pathSize;
int fd = -1;
request_msg_img_t *msg_img = MSG_IMG(query_msg);
open_file_by_path_img_t *img = IMG_PAYLOAD(msg_img);
(void) transport;
if (MSG_SIZE(query_msg) <= sizeof(request_msg_img_t) + sizeof(open_file_by_path_img_t)) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
return -EINVAL;
}
path = img->path;
pathSize = MSG_SIZE(query_msg) - (sizeof(request_msg_img_t) + sizeof(open_file_by_path_img_t));
path[pathSize - 1] = '\0';
ret = open_file_with_flags(path, img->flags, img->mode, &fd);
if (0 == ret) {
ret = open_file_return_msg_new(reply_msg, fd);
}
return ret;
}
static long transport_ioctl_handle_get_version(msg_varsized_t *reply_msg)
{
return version_info_return_msg_new(reply_msg);
}
static long transport_ioctl_handle_data_queue_init(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
request_msg_img_t *msg_img = MSG_IMG(query_msg);
data_queue_params_t *params = IMG_PAYLOAD(msg_img);
long err;
shared_data_queue_t *queue;
uint32_t queue_size;
if (MSG_SIZE(query_msg) < sizeof(request_msg_img_t) + sizeof(data_queue_params_t)) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
return -EINVAL;
}
err = data_queue_create(params, &queue);
if (err) {
return err;
}
queue_size = params->size - DATA_QUEUE_HEADER_SIZE;
{
spin_lock(&transport->msg_spinlock);
if (transport->queue) {
spin_unlock(&transport->msg_spinlock);
vfree(queue);
return -EEXIST;
}
transport->queue = queue;
transport->queue_size = queue_size;
spin_unlock(&transport->msg_spinlock);
}
return data_queue_offsets_return_msg_new(reply_msg, queue_size);
}
static int mnt_info_ret_msg_new(msg_varsized_t *msg, bool ok)
{
size_t msg_img_size;
request_msg_img_t *msg_img;
mnt_info_ret_img_t *mnt_info_ret_img;
msg_sized_t *smsg;
msg_img_size = sizeof(request_msg_img_t) + sizeof(mnt_info_ret_img_t);
smsg = msg_varsized_init(msg, msg_img_size);
if (smsg)
{
MSG_TYPE(smsg) = RT_GET_MNT_ID_OFFSET;
}
if (!smsg)
{
return -ENOMEM;
}
msg_img = MSG_IMG(smsg);
mnt_info_ret_img = IMG_PAYLOAD(msg_img);
mnt_info_ret_img->ok = ok;
return 0;
}
#define MNT_ID_FIND_BUFFER_LEN (128)
static inline bool file_ok(struct file *file)
{
struct dentry *dentry;
struct inode *inode;
if (!file)
return false;
dentry = file->f_path.dentry;
if (!dentry)
return false;
inode = dentry->d_inode;
if (!inode)
return false;
if (!inode->i_sb)
return false;
return true;
}
#ifndef VFSMOUNT_HAS_MNT_ID
// return 0 on error, offset on success
static int find_mnt_id_offset(mnt_info_img_t *img)
{
uint32_t i = 0;
uintptr_t info_ptr = (uintptr_t)img->data;
// offset[-MNT_ID_FIND_BUFFER_LEN, -1] => offset_map[0, MNT_ID_FIND_BUFFER_LEN - 1],
// offset[1, MNT_ID_FIND_BUFFER_LEN] => offset_map[MNT_ID_FIND_BUFFER_LEN + 1, MNT_ID_FIND_BUFFER_LEN * 2]
DECLARE_BITMAP(offset_map, MNT_ID_FIND_BUFFER_LEN * 2 + 1);
bitmap_fill(offset_map, MNT_ID_FIND_BUFFER_LEN * 2 + 1);
if (img->count <= 0)
{
return 0;
}
for (i = 0; i < (sizeof(struct vfsmount) / sizeof(int)); i++)
{
clear_bit(i + MNT_ID_FIND_BUFFER_LEN, offset_map);
}
for (i = 0; i < img->count; i++)
{
struct compat_fd fd;
mnt_info_t *info = (mnt_info_t *)info_ptr;
info_ptr += (sizeof(mnt_info_t));
fd = compat_fdget(info->fd);
if (file_ok(fd_file(fd)))
{
struct path path = fd_file(fd)->f_path;
int *mnt_ptr = (int *)path.mnt;
int count = 0;
while (count <= MNT_ID_FIND_BUFFER_LEN)
{
int *mnt_id = (int *)(mnt_ptr + count);
if (*mnt_id != info->mntId)
{
clear_bit(count + MNT_ID_FIND_BUFFER_LEN, offset_map);
}
mnt_id = (int *)(mnt_ptr - count);
if (*mnt_id != info->mntId)
{
clear_bit(MNT_ID_FIND_BUFFER_LEN - count, offset_map);
}
count++;
}
compat_fdput(fd);
}
}
for (i = 0; i < MNT_ID_FIND_BUFFER_LEN * 2; i++)
{
if (test_bit(i, offset_map))
{
return i - MNT_ID_FIND_BUFFER_LEN;
}
}
return 0;
}
int global_mnt_id_offset = 0;
#endif
static long transport_ioctl_handle_mnt_info(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
int ret;
int offset = 0;
request_msg_img_t *msg_img = MSG_IMG(query_msg);
mnt_info_img_t *img = IMG_PAYLOAD(msg_img);
(void)transport;
if (MSG_SIZE(query_msg) < sizeof(request_msg_img_t) + sizeof(mnt_info_img_t)) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
return -EINVAL;
}
#ifdef VFSMOUNT_HAS_MNT_ID
(void) img; (void) offset;
ret = mnt_info_ret_msg_new(reply_msg, true);
#else
offset = find_mnt_id_offset(img);
IPRINTF("mnt offset in vfsmount: %d", offset);
if (offset != 0)
{
WRITE_ONCE(global_mnt_id_offset, offset);
ret = mnt_info_ret_msg_new(reply_msg, true);
}
else
{
ret = mnt_info_ret_msg_new(reply_msg, false);
}
#endif
return ret;
}
static long transport_ioctl_process_info(msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
if (query_msg->img_size < (sizeof(request_msg_img_t) + sizeof(get_process_info_img_t))) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
ret = -EINVAL;
} else {
request_msg_img_t *msg_img = MSG_IMG(query_msg);
get_process_info_img_t *img = IMG_PAYLOAD(msg_img);
pid_t pid = img->pid;
ret = process_info_return_msg_new(reply_msg, pid);
}
DPRINTF("ret=%li", ret);
return ret;
}
static long transport_ioctl_process_pid_version(msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
if (query_msg->img_size < (sizeof(request_msg_img_t) + sizeof(get_process_info_img_t))) {
EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg)));
ret = -EINVAL;
} else {
request_msg_img_t *msg_img = MSG_IMG(query_msg);
get_process_info_img_t *img = IMG_PAYLOAD(msg_img);
pid_t pid = img->pid;
ret = process_pid_version_return_msg_new(reply_msg, pid);
}
DPRINTF("ret=%li", ret);
return ret;
}
static long transport_ioctl_write_read_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg)
{
long ret;
action_type_t action_type = 0;
if (MSG_REPLY(query_msg)) {
EPRINTF("'reply' ioctl is not supported");
ret = -EINVAL;
goto out;
}
action_type = MSG_TYPE(query_msg);
switch (action_type) {
case AT_OPEN_FILE_FROM_MSG:
ret = transport_ioctl_handle_open_file_from_msg(transport, reply_msg, query_msg);
break;
case AT_OPEN_FILE_BY_PATH:
ret = transport_ioctl_handle_open_file_by_path(transport, reply_msg, query_msg);
break;
case AT_GET_VERSION:
ret = transport_ioctl_handle_get_version(reply_msg);
break;
case AT_INIT_SHARED_DATA_QUEUE:
ret = transport_ioctl_handle_data_queue_init(transport, reply_msg, query_msg);
break;
case AT_GET_MNT_ID_OFFSET:
ret = transport_ioctl_handle_mnt_info(transport, reply_msg, query_msg);
break;
case AT_GET_PROCESS_INFO:
ret = transport_ioctl_process_info(reply_msg, query_msg);
break;
case AT_GET_PROCESS_PID_VERSION:
ret = transport_ioctl_process_pid_version(reply_msg, query_msg);
break;
default:
EPRINTF("Unexpected '%s' message", action_type_to_string(action_type));
HEX_DUMP("query_msg: ", MSG_IMG(query_msg), MSG_SIZE(query_msg));
ret = -EINVAL;
break;
}
out:
DPRINTF("action_type=%d ret=%li", (int) action_type, ret);
return ret;
}
static long transport_ioctl_copy_from_user(ioctl_hdr_t *ioctl_hdr,
msg_varsized_t *query_msg, void __user *user_data)
{
long ret;
size_t msg_size;
msg_sized_t *msg;
request_msg_img_t *msg_img;
void *payload;
if (copy_from_user(ioctl_hdr, user_data, sizeof(ioctl_hdr_t))) {
EPRINTF("'copy_from_user()' failure");
ret = -EFAULT;
goto out;
}
msg_size = ioctl_hdr->size;
if (msg_size < sizeof(request_msg_img_t)) {
EPRINTF("message image is too small");
ret = -EINVAL;
goto out;
}
if (msg_size > TRANSPORT_MSG_SIZE_MAX) {
EPRINTF("size > TRANSPORT_MSG_SIZE_MAX");
ret = -E2BIG;
goto out;
}
msg = msg_varsized_init(query_msg, msg_size);
if (!msg) {
ret = -ENOMEM;
goto out;
}
msg_img = MSG_IMG(msg);
payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t);
if (copy_from_user(msg_img, payload, msg_size)) {
msg_varsized_uninit(query_msg);
EPRINTF("'copy_from_user()' failure");
ret = -EFAULT;
goto out;
}
ret = 0;
out:
DPRINTF("ret=%li", ret);
return ret;
}
static long transport_ioctl_copy_to_user(ioctl_hdr_t *ioctl_hdr,
msg_sized_t *reply_msg, void __user *user_data)
{
long ret;
size_t msg_size = MSG_SIZE(reply_msg);
size_t capacity;
void *payload;
request_msg_img_t *msg_img;
ioctl_hdr->size = msg_size;
if (copy_to_user(user_data, ioctl_hdr, sizeof(ioctl_hdr_t))) {
EPRINTF("'copy_to_user()' failure");
ret = -EFAULT;
goto out;
}
capacity = ioctl_hdr->capacity;
if (capacity < msg_size) {
WPRINTF("capacity=%zu < msg_size=%zu", capacity, msg_size);
ret = -ENOSPC;
goto out;
}
payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t);
msg_img = MSG_IMG(reply_msg);
if (copy_to_user(payload, msg_img, msg_size)) {
EPRINTF("'copy_to_user()' failure");
ret = -EFAULT;
goto out;
}
ret = 0;
out:
DPRINTF("ret=%li", ret);
return ret;
}
long transport_device_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
transport_t *transport = filp->private_data;
long ret;
if (READ_ONCE(transport->shutdown)) {
ret = -EIO;
goto out;
}
switch (cmd) {
case IOCTL_WRITE_AND_READ_MSG:
case IOCTL_READ_VERSION:
{
ioctl_hdr_t ioctl_hdr;
void *user_data = (void *)arg;
msg_varsized_t query_msg;
ret = transport_ioctl_copy_from_user(&ioctl_hdr, &query_msg, user_data);
if (!ret) {
msg_varsized_t reply_msg;
ret = transport_ioctl_write_read_msg(transport, &reply_msg, MSG_VARSIZED_GET_SIZED(&query_msg));
if (!ret) {
ret = transport_ioctl_copy_to_user(&ioctl_hdr, MSG_VARSIZED_GET_SIZED(&reply_msg), user_data);
msg_varsized_uninit(&reply_msg);
}
msg_varsized_uninit(&query_msg);
}
break;
}
default:
EPRINTF("Unexpected IOCTL cmd=%u", cmd);
ret = -ENOIOCTLCMD;
}
out:
if (-EINVAL == ret) {
EPRINTF("ioctl failed with EINVAL, dropping the transport");
transport_shutdown(transport);
}
DPRINTF("ret=%li", ret);
return ret;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ssize_t transport_device_read(struct file *filp, char __user *user_data,
size_t size, loff_t *offset)
{
msg_t *msg;
transport_t *transport = filp->private_data;
size_t img_size;
ssize_t ret;
(void) offset;
if (filp->f_flags & O_NONBLOCK) {
EPRINTF("'non-blocking' mode is not supported yet");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
if (!size) {
EPRINTF("'empty read' is not supported");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
retry_wait:
// We may start with 'wait*()' because it itself starts
// with 'condition' check.
if (wait_event_interruptible_exclusive(transport->event->msg_wait_queue,
READ_ONCE(transport->shutdown)
|| !ring_is_empty(&transport->msg_ring))) {
ret = -EINTR;
goto out;
}
// Lock the state and check if processing is actually possible.
spin_lock(&transport->msg_spinlock);
{
if (READ_ONCE(transport->shutdown)) {
ret = -EIO;
spin_unlock(&transport->msg_spinlock);
goto out;
}
if (ring_is_empty(&transport->msg_ring)) {
WPRINTF("wakeup without messages");
spin_unlock(&transport->msg_spinlock);
goto retry_wait;
}
msg = *(msg_t **) ring_consumer_ptr(&transport->msg_ring);
img_size = msg->event.Size;
DPRINTF("size=%zu img_size=%zu", size, img_size);
if (size < img_size) {
ret = -ENOSPC;
spin_unlock(&transport->msg_spinlock);
goto out;
}
ring_consumer_index_move_one(&transport->msg_ring);
}
spin_unlock(&transport->msg_spinlock);
// 'copy_to_user' MAY sleep (for example in page fault handler)
if (copy_to_user(user_data, &msg->event, img_size)) {
WPRINTF("'copy_to_user()' failure");
ret = -EFAULT;
transport_shutdown(transport);
} else {
ret = img_size;
}
msg_unref(msg);
out:
DPRINTF("ret=%zi", ret);
return ret;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Forward declaration, to be available in the end of the code
static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies);
static void msg_wait_reply(msg_t *msg)
{
long ret;
// We may start with 'wait*()' because it itself starts
// with 'condition' check.
DPRINTF("waiting for userspace reply...");
ret = wait_msg_killable_timeout(msg, msecs_to_jiffies(TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS));
if (!ret) {
msg_type_t operation = (msg_type_t) msg->event.Operation;
// Timeout here means unexpected issue with userspace.
FPRINTF("timeout waiting for userspace reply (msg_type=%d/%s)", operation, msg_type_to_string(operation));
HEX_DUMP("msg: ", &msg->event, msg->event.Size);
dump_stack();
// identify and shutdown transport failed to reply
transport_global_shutdown_msg(msg);
} else if (ret < 0) {
// Calling process has been interrupted as SIGKILL was received.
// In practice this means 'block'.
DPRINTF("message was interrupted...");
msg->interrupted = true;
} else {
// Userspace reply has been received (msg->reply_msg) or
// waiting has been explicitly aborted (msg->aborted) for
// example on userspace disconnect.
DPRINTF("wait finished (msg->block=%i, wc=%d)", msg->block, atomic_read(&msg->reply_wait_count));
}
}
void send_msg_async(msg_t *msg)
{
DPRINTF("msg=%p", msg);
send_msg_nowait(msg);
DPRINTF("");
}
void send_msg_async_unref_unchecked(msg_t *msg)
{
send_msg_async(msg);
msg_unref(msg);
}
static void msg_mark_sync(msg_t *msg)
{
msg->event.CallbackType = FP_SI_CT_WANT_REPLY;
}
static bool send_msg_sync_nowait(msg_t *msg)
{
bool sent;
DPRINTF("msg=%p", msg);
msg_mark_sync(msg);
sent = send_msg_nowait(msg);
DPRINTF("msg=%p sent=%i", msg, sent);
return sent;
}
void send_msg_sync(msg_t *msg)
{
DPRINTF("msg=%p", msg);
if (send_msg_sync_nowait(msg)) {
msg_wait_reply(msg);
}
DPRINTF("");
}
void send_msg_sync_unref_unchecked(msg_t *msg)
{
send_msg_sync(msg);
thread_safe_path_clear(&msg->path);
// TODO: Why path2 is not cleared?
msg_unref(msg);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static int transport_handle_ping_msg(transport_t *transport, msg_sized_t *ping)
{
int ret;
msg_t *msg;
bool sync;
uint64_t event_uid;
transport_event_t *event = NULL;
if (ping->img_size < (sizeof(request_msg_img_t) + sizeof(ping_img_t))) {
DPRINTF("'ping' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
event_uid = transport_global_sequence_next();
msg = pong_msg_new(ping, event_uid);
if (!msg) {
ret = -ENOMEM;
goto out;
}
// reflect ping's 'reply' policy
sync = !!MSG_ID(ping);
if (sync) {
msg->event.CallbackType = FP_SI_CT_WANT_REPLY;
}
transport_send_msg_nowait(transport, msg, &event);
if (event) {
wake_up_interruptible_sync(&event->msg_wait_queue);
transport_event_unref(event);
}
msg_wait_reply(msg);
ret = 0;
msg_unref(msg);
out:
return ret;
}
static int transport_handle_set_listening_mask_process(transport_t *transport, msg_sized_t *msg)
{
request_msg_img_t *msg_img;
process_set_listening_mask_img_t *img;
pid_t pid;
uint64_t events_mask;
int ret;
task_info_t *info;
uint64_t unique_pid;
uint64_t pid_version;
if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(process_set_listening_mask_img_t))) {
DPRINTF("'pid' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
pid = img->pid;
events_mask = img->events_mask;
unique_pid = img->unique_pid;
pid_version = img->pid_version;
// If user gives us 'pid_version', do not attempt to do weird gets, fetch the current info in the map.
if (0 == pid_version)
info = task_info_map_get_by_pid(pid, unique_pid);
else
info = task_info_lookup(pid, unique_pid);
if (info) {
ret = task_info_set_listening_mask(info, transport->transport_id, events_mask, pid_version);
task_info_put(info);
} else {
ret = -ESRCH;
}
out:
DPRINTF("ret=%i", ret);
return ret;
}
// This msg is received when userspace is processing a event msg
static int transport_handle_file_context_add_msg(transport_t *transport, msg_sized_t *add_msg)
{
request_msg_img_t *add_msg_img;
msg_t *msg;
int ret = 0;
if (add_msg->img_size < (sizeof(request_msg_img_t)))
{
DPRINTF("message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
add_msg_img = MSG_IMG(add_msg);
msg = transport_lookup_msg_ref(transport, add_msg_img->id);
if (!msg)
{
ret = -ESRCH;
}
else
{
// At this moment, the event msgs sent with sync is still exist,
// we can utilize their informaton here.
if (msg->file_context_msg_info.key.file_key.ptr != 0)
{
int flags = 0;
file_context_info_t info = {0};
uint16_t operation = msg->event.Operation;
if (FP_SI_OT_SYNC_FILE_PRE_OPEN == operation || FP_SI_OT_SYNC_FILE_PRE_WRITE == operation)
{
info.pid_version = msg->open.pid_version;
flags = msg->open.flags;
info.low = msg->write.low;
info.high = msg->write.high;
}
else
{
EPRINTF("%s: unsupported msg type", __func__);
msg_unref(msg);
return -EFAULT;
}
if ((flags & O_ACCMODE) <= O_RDWR)
{
flags += 1;
}
info.msg_info.key = msg->file_context_msg_info.key;
if (FP_SI_OT_SYNC_FILE_PRE_OPEN == operation)
{
file_context_open_file_t *file_node = NULL;
file_context_open_process_t *process_node = NULL;
add_open_cache(transport->transport_id, &info, &file_node, &process_node);
if (process_node)
{
atomic_or_compat(flags, &process_node->flags);
}
put_open_cache(file_node, process_node);
}
else if (FP_SI_OT_SYNC_FILE_PRE_WRITE == operation)
{
file_context_rw_t *node = NULL;
node = add_rw_cache(transport->transport_id, &info, FILE_CONTEXT_WRITE_TABLE);
if (node)
{
put_rw_cache(node);
}
}
}
msg_unref(msg);
}
out:
DPRINTF("ret=%i", ret);
return ret;
}
static int transport_handle_set_listening_mask_global(transport_t *transport, msg_sized_t *msg)
{
request_msg_img_t *msg_img;
events_mask_img_t *img;
uint64_t mask;
int ret;
if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(events_mask_img_t))) {
DPRINTF("'events' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
mask = img->events_mask;
spin_lock(&transport->msg_spinlock);
{
if (READ_ONCE(transport->shutdown)) {
// Do not allow changing the mask when shutdown.
// Transport will not be able to receive any events.
ret = -EFAULT;
} else {
WRITE_ONCE(transport->events_mask, mask);
ret = 0;
}
}
spin_unlock(&transport->msg_spinlock);
transport_global_recalculate_combined_all_event_masks();
out:
DPRINTF("ret=%i", ret);
return ret;
}
static int transport_handle_set_listening_subtype_mask(transport_t *transport, uint64_t* target, msg_sized_t *msg)
{
request_msg_img_t *msg_img;
events_mask_img_t *img;
uint64_t mask;
int ret;
if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(events_mask_img_t))) {
DPRINTF("'events' message is too short. ignoring it.");
ret = -EINVAL;
goto out;
}
msg_img = MSG_IMG(msg);
img = IMG_PAYLOAD(msg_img);
mask = img->events_mask;
spin_lock(&transport->msg_spinlock);
{
if (READ_ONCE(transport->shutdown)) {
// Do not allow changing the mask when shutdown.
// Transport will not be able to receive any events.
ret = -EFAULT;
} else {
WRITE_ONCE(*target, mask);
ret = 0;
}
}
spin_unlock(&transport->msg_spinlock);
transport_global_recalculate_combined_all_event_masks();
out:
DPRINTF("ret=%i", ret);
return ret;
}
// FIXME: do something with 'reply'. For example merge several replies
// into one; link replies into list; extract 'responces' and merge them.
static void handle_reply(msg_t *query_msg, msg_sized_t *reply_msg)
{
// handle 'long' 'reply'
size_t headers_size = sizeof(request_msg_img_t) + sizeof(reply_img_t);
// Note: for compatibility with legacy short 'reply_img_t' default 'reply_type' is RT_ALLOW
if (MSG_SIZE(reply_msg) >= headers_size) {
request_msg_img_t *reply_msg_img = MSG_IMG(reply_msg);
reply_img_t *reply_img = IMG_PAYLOAD(reply_msg_img);
reply_type_t reply_type = reply_img->type;
DPRINTF("MSG_SIZE(reply_msg)=%zu - headers_size=%zu = %zu reply_type=%u",
MSG_SIZE(reply_msg), headers_size,
MSG_SIZE(reply_msg) - headers_size, reply_type);
if (RT_BLOCK == reply_type) {
query_msg->block = true;
}
}
}
static int transport_handle_reply(transport_t *transport, msg_sized_t *reply)
{
msg_id_t reply_id = MSG_ID(reply);
msg_type_t reply_type = MSG_TYPE(reply);
msg_t* msg = NULL;
DPRINTF("%lu %d", reply_id, reply_type);
// find 'query' matching this 'reply'
spin_lock(&transport->msg_spinlock);
{
void *item_ptr = set_begin_ptr(&transport->sent_msgs_set);
void *end_ptr = set_end_ptr(&transport->sent_msgs_set);
while (item_ptr < end_ptr) {
msg_t *query = *(msg_t **) item_ptr;
if (query->id == reply_id) {
// remove 'query' from 'set'
*(msg_t **) item_ptr = *(msg_t **) set_item_ptr(
&transport->sent_msgs_set,
set_items_count_dec(&transport->sent_msgs_set));
msg = query;
goto unlock;
}
item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr);
}
WPRINTF("Unexpected 'reply' with type=%i id=%llX", reply_type, reply_id);
}
unlock:
spin_unlock(&transport->msg_spinlock);
if (msg) {
handle_reply(msg, reply);
msg_reply_wait_count_dec(msg);
}
return msg ? 0 : -ESRCH;
}
static int transport_handle_msg(transport_t *transport, msg_sized_t *msg)
{
int ret;
if (msg->img_size < sizeof(request_msg_img_t)) {
DPRINTF("message image is too small");
ret = -EINVAL;
goto out;
}
if (MSG_REPLY(msg)) {
ret = transport_handle_reply(transport, msg);
} else { // !reply
action_type_t type = MSG_TYPE(msg);
DPRINTF("type=%i", type);
switch (type) {
case AT_PING:
ret = transport_handle_ping_msg(transport, msg);
break;
case AT_WAIT_SHARED_DATA_QUEUE:
ret = transport_data_queue_wait(transport);
break;
case AT_FILE_CONTEXT_ADD:
ret = transport_handle_file_context_add_msg(transport, msg);
break;
case AT_SET_LISTENING_MASK_GLOBAL:
ret = transport_handle_set_listening_mask_global(transport, msg);
break;
case AT_SET_LISTENING_MASK_PROCESS:
ret = transport_handle_set_listening_mask_process(transport, msg);
break;
case AT_SET_LISTENING_SUBTYPE_INCLUSION_MASK:
ret = transport_handle_set_listening_subtype_mask(transport, &transport->events_subtype_inclusion_mask, msg);
break;
case AT_SET_LISTENING_SUBTYPE_EXCLUSION_MASK:
ret = transport_handle_set_listening_subtype_mask(transport, &transport->events_subtype_exclusion_mask, msg);
break;
default:
WPRINTF("Unexpected message type=%i/%s", type, action_type_to_string(type));
ret = -EINVAL;
}
}
out:
DPRINTF("ret=%i", ret);
return ret;
}
ssize_t transport_device_write(struct file *filp, const char __user *user_data,
size_t size, loff_t *offset)
{
transport_t *transport = filp->private_data;
msg_varsized_t msg;
msg_sized_t* smsg;
request_msg_img_t *msg_img;
ssize_t ret;
(void) offset;
if (READ_ONCE(transport->shutdown)) {
ret = -EIO;
goto out;
}
if (filp->f_flags & O_NONBLOCK) {
EPRINTF("'non-blocking' mode is not supported yet");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
if (!size) {
WPRINTF("'zero write' is not supported");
ret = -EINVAL;
transport_shutdown(transport);
goto out;
}
if (size > TRANSPORT_MSG_SIZE_MAX) {
WPRINTF("size > TRANSPORT_MSG_SIZE_MAX");
ret = -E2BIG;
goto out;
}
smsg = msg_varsized_init(&msg, size);
if (!smsg) {
ret = -ENOMEM;
goto out;
}
msg_img = MSG_IMG(smsg);
if (copy_from_user(msg_img, user_data, size)) {
EPRINTF("'copy_from_user()' failure");
ret = -EFAULT;
transport_shutdown(transport);
goto free_msg;
}
ret = transport_handle_msg(transport, smsg);
if (ret) {
// make sure error code is negative
if (ret > 0) {
EPRINTF("error code must be negative");
ret = -ret;
}
goto free_msg;
}
ret = size;
free_msg:
msg_varsized_uninit(&msg);
out:
DPRINTF("ret=%zi", ret);
return ret;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/*
Warning: 'transport_open()' and 'transport_release()' may be
simultaneously invoked by several threads or processes.
Note: We can match different 'transport' instances using 'device'
'major'/'minor' from 'inode->i_rdev'. Pointer to selected 'trasport'
can be stored in 'filp->private_data' for later use in '*_read()',
'*_write()', etc.
Note: We may create 'transport' on 'first open' and destroy it on
'last close'.
*/
/*
There is possibility of 'deadlock' between our 'kernel' and
'userspace' code while processing events generated by our userspace
process until registration of our userspace process in 'ignore' list.
*/
int transport_device_open(struct inode *inode, struct file *filp)
{
bool ok;
transport_t *transport;
int ret;
bool safe_mode;
#ifdef FMODE_NONOTIFY
// We are using 'fsnotify' ourselves so avoid raising 'fsnotify' events
filp->f_mode |= FMODE_NONOTIFY;
#endif
DPRINTF("inode->i_rdev: major=%u minor=%u", imajor(inode), iminor(inode));
DPRINTF("filp->f_flags=%X", filp->f_flags);
if (filp->f_flags & O_NONBLOCK) {
EPRINTF("'non-blocking' mode is not supported yet");
ret = -EINVAL;
goto out;
}
safe_mode = filp->f_flags & O_NOCTTY;
#ifndef HOOK_LIST_USE_HLIST
if (safe_mode) {
EPRINTF("safe mode requires LSM hook list");
ret = -EINVAL;
goto out;
}
#endif
#ifndef CONFIG_SECURITY_PATH
if (safe_mode) {
EPRINTF("safe mode requires LSM security path");
ret = -EINVAL;
goto out;
}
#endif
#ifndef FMODE_NONOTIFY
if (safe_mode) {
EPRINTF("safe mode requires fanotify FMODE_NONOTIFY flag");
ret = -EINVAL;
goto out;
}
#endif
mutex_lock(&transport_global.transport_mutex);
{
DPRINTF("transport_count=%u", transport_global.transport_count);
transport = transport_new();
if (!transport) {
WPRINTF("'%s()' failure", "transport_new");
ret = -ENOMEM;
goto unlock_open_close_mutex;
}
filp->private_data = transport;
if (!transport_global.transport_count) {
// FIXME: 'attach' may fail
IPRINTF("attaching interceptors");
ret = acquire_file_modify_entry();
if (ret != 0)
{
goto unlock_open_close_mutex;
}
mod_rundown_protection_set_ready();
fsnotify_events_listener_init();
register_ftrace_post_events();
lsm_hooks_init();
ret = syscall_hooks_attach(safe_mode);
if (ret) {
EPRINTF("'%s()' failure %i", "syscall_hooks_attach", ret);
lsm_hooks_exit();
unregister_ftrace_post_events();
mod_rundown_protection_set_rundown_active();
ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS));
if (!ok) {
WPRINTF("Failed to wait for module rundown");
}
fsnotify_events_listener_deinit();
transport_disable(transport);
transport_free(transport);
filp->private_data = NULL;
release_file_modify_entry();
goto unlock_open_close_mutex;
}
ret = tracepoints_attach();
if (ret) {
EPRINTF("'%s()' failure %i", "tracepoints_attach", ret);
syscall_hooks_detach();
lsm_hooks_exit();
unregister_ftrace_post_events();
mod_rundown_protection_set_rundown_active();
ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS));
if (!ok) {
WPRINTF("Failed to wait for module rundown");
}
fsnotify_events_listener_deinit();
transport_disable(transport);
transport_free(transport);
filp->private_data = NULL;
release_file_modify_entry();
goto unlock_open_close_mutex;
}
IPRINTF("interceptors attached");
}
++transport_global.transport_count;
ret = 0;
}
unlock_open_close_mutex:
mutex_unlock(&transport_global.transport_mutex);
out:
DPRINTF("ret=%i", ret);
if (ret)
return ret;
#ifdef FMODE_STREAM
return stream_open(inode, filp);
#else
#ifdef FMODE_ATOMIC_POS
filp->f_mode &= ~(FMODE_ATOMIC_POS);
#endif
return nonseekable_open(inode, filp);
#endif
}
// 'release()' means 'close()'
int transport_device_release(struct inode *inode, struct file *filp)
{
bool ok;
transport_t *transport = filp->private_data;
(void) inode;
mutex_lock(&transport_global.transport_mutex);
{
transport_disable(transport);
transport_global_recalculate_combined_all_event_masks_impl();
DPRINTF("transport_count=%u", transport_global.transport_count);
if (!--transport_global.transport_count) {
IPRINTF("detaching interceptors");
tracepoints_detach();
// FIXME: 'syscall_hooks_detach()' may fail
syscall_hooks_detach();
lsm_hooks_exit();
unregister_ftrace_post_events();
mod_rundown_protection_set_rundown_active();
ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS));
if (!ok) {
WPRINTF("Failed to wait for module rundown");
}
// It is absolutely crucial to call this after rundown protection!!!
fsnotify_events_listener_deinit();
task_info_maps_clear();
release_file_modify_entry();
IPRINTF("interceptors detached");
}
}
mutex_unlock(&transport_global.transport_mutex);
synchronize_rcu();
transport_free(transport);
return 0;
}
int transport_device_mmap(struct file *filp, struct vm_area_struct *vma)
{
int ret;
transport_t *transport = filp->private_data;
if (READ_ONCE(transport->shutdown)) {
ret = -EIO;
goto out;
}
ret = transport_data_queue_mmap(transport, vma);
out:
return ret;
}
static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies)
{
#ifndef HAVE_WAIT_EVENT_KILLABLE_TIMEOUT
// 'wait_event_interruptible_timeout' has to be a define and so is
// 'TASK_KILLABLE' and 'TASK_INTERRUPTIBLE'.
// I need functionality of 'wait_event_interruptible_timeout'
// but 'TASK_INTERRUPTIBLE' replaced with 'TASK_KILLABLE' which
// is achieved using the 'define' tricks by redefining 'TASK_INTERRUPTIBLE'.
// If the trick won't work, using the regular 'wait_event_timeout'.
#if defined(TASK_KILLABLE) && defined(TASK_INTERRUPTIBLE) && defined(wait_event_interruptible_timeout) && !defined(signal_pending)
#undef TASK_INTERRUPTIBLE
#define TASK_INTERRUPTIBLE TASK_KILLABLE
#define signal_pending fatal_signal_pending
return wait_event_interruptible_timeout(msg->wait_queue,
!atomic_read(&msg->reply_wait_count),
timeout_jiffies);
#undef TASK_INTERRUPTIBLE
#undef signal_pending
#else
// Something weird is going on, rollback to 'TASK_UNINTERRUPTIBLE' variant.
// It should not cause any issues though as far as APL
// daemon is responding to events so it is not bad.
return wait_event_timeout(msg->wait_queue,
!atomic_read(&msg->reply_wait_count),
timeout_jiffies);
#endif
#else
// Just use the well defined macros available.
return wait_event_killable_timeout(msg->wait_queue,
!atomic_read(&msg->reply_wait_count),
timeout_jiffies);
#endif
}
|