Skip to content
Snippets Groups Projects
Unverified Commit 98009312 authored by Michal 'vorner' Vaner's avatar Michal 'vorner' Vaner
Browse files

Merge branch 'fwupdate'

parents f4b34091 b59a4f81
Branches
No related merge requests found
......@@ -112,10 +112,16 @@ config_check_callback:: When ucollect is loading new configuration, it
config_finish_callback:: According to parameter, either start using
the new configuration previously scanned by `config_check_callback`,
or forget it existed.
child_died_callback:: A callback that's called whenever a child
of ucollect terminates. It is called even for children not created
by the plugin. The child's pid and exit status from `wait()` is
included.
Furthermore, a plugin may declare its API version, by providing a
function `api_version`, retuning an unsigned number. If none is
provided, the API version is considered 0.
provided, the API version is considered 0. The core defines the
highest API version declared at the time of compilation in
`UCOLLECT_PLUGIN_API_VERSION`.
Plugin libraries
----------------
......
......@@ -35,6 +35,7 @@
#include <string.h> // Why is memcpy in string?
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <inttypes.h>
......@@ -86,6 +87,11 @@ static const int signals[] = {
static void chld_handler(int unused) {
(void) unused;
/*
* The children are polled repeatedly whenever the loop turns. We
* just need a handler to receive the signal (and interrupt epoll_pwait)
* and get the zombie.
*/
}
static void signal_initialize(void) {
......@@ -99,7 +105,7 @@ static void signal_initialize(void) {
die("Sigaction failed for signal %d: %s\n", signals[i], strerror(errno));
struct sigaction chld_action = {
.sa_handler = chld_handler,
.sa_flags = SA_NOCLDSTOP | SA_NOCLDWAIT | SA_NODEFER
.sa_flags = SA_NOCLDSTOP
};
if (sigaction(SIGCHLD, &chld_action, NULL) == -1)
die("Can't set action for SIGCHLD: %s\n", strerror(errno));
......@@ -303,6 +309,7 @@ GEN_CALL_WRAPPER_PARAM(packet, const struct packet_info *)
GEN_CALL_WRAPPER_PARAM_2(uplink_data, const uint8_t *, size_t)
GEN_CALL_WRAPPER_PARAM_2(fd, int, void *)
GEN_CALL_WRAPPER_PARAM(config_finish, bool)
GEN_CALL_WRAPPER_PARAM_2(child_died, int, pid_t)
static char *gdb_command;
static volatile sig_atomic_t in_signal = 0;
......@@ -611,7 +618,9 @@ static int blocked_signals[] = {
SIGTERM,
// Reconfiguration
SIGHUP,
SIGUSR1
SIGUSR1,
// Children died
SIGCHLD
};
// Not thread safe, not even reentrant :-(
......@@ -783,6 +792,33 @@ void loop_run(struct loop *loop) {
}
goto REINIT;
}
/*
* Handle dead children. Or, one child. If it is there, retry the loop,
* as the handler may have done something to the timers or file descriptors.
*
* Note that the epoll_pwait would either terminate before the child died, in
* which case we get it here anyway, or it would be interrupted by the SIGCHLD.
* And, even if it wasn't, the child would be picked up eventually anyway.
*/
int child_state;
pid_t child = waitpid(-1, &child_state, WNOHANG);
if (child > 0) {
ulog(LLOG_DEBUG, "Child %d terminated, status %d\n", (int)child, child_state);
LFOR(plugin, plugin, &loop->plugins)
if (plugin->api_version >= 2)
plugin_child_died(plugin, child_state, child);
continue; // The child handler may have manipulated the things here. Get new set of events.
} else if (child < 0) {
switch (errno) {
case ECHILD:
break; // That's OK, no children.
case EINTR: // This shouldn't happen with WNOHANG, but try again anyway.
ulog(LLOG_WARN, "Wait interrupted\n");
break;
default:
die("Error at wait: %s\n", strerror(errno));
}
} // 0 -> there are some children, but they are still alive
// Handle timeouts.
bool timeouts_called = false;
while (loop->timeout_count && loop->timeouts[0].when <= loop->now) {
......
......@@ -23,6 +23,7 @@
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <unistd.h>
#include "pluglib.h"
......@@ -54,8 +55,11 @@ struct plugin {
/* ----- The below things are available only from API version 1 and above ----- */
// Functions imported from plugin libraries
struct pluglib_import **imports;
/* ----- The below things are available only from API version 2 and above ----- */
// Broadcasted when a child of ucollect dies. It may belong to other plugin, for example. The state is one from the wait() function.
void (*child_died_callback)(struct context *context, int state, pid_t child);
};
#define UCOLLECT_PLUGIN_API_VERSION 1
#define UCOLLECT_PLUGIN_API_VERSION 2
#endif
......@@ -68,17 +68,6 @@ static void connected(struct context *context) {
uplink_plugin_send_message(context, "C", 1);
}
static void initialize(struct context *context) {
struct user_data *u = context->user_data = mem_pool_alloc(context->permanent_pool, sizeof *context->user_data);
*u = (struct user_data) {
.conf_pool = loop_pool_create(context->loop, context, "FWUp set pool 1"),
.standby_pool = loop_pool_create(context->loop, context, "FWUp set pool 2"),
.queue = queue_alloc(context)
};
// Ask for config, if already connected (unlikely, but then, the message will get blackholed).
connected(context);
}
struct config {
uint32_t version;
uint32_t set_count;
......@@ -438,6 +427,22 @@ static void communicate(struct context *context, const uint8_t *data, size_t len
}
}
static void child_died(struct context *context, int state, pid_t pid) {
sanity(context->user_data->queue, "Missing the ipset queue\n");
queue_child_died(context, state, pid, context->user_data->queue);
}
static void initialize(struct context *context) {
struct user_data *u = context->user_data = mem_pool_alloc(context->permanent_pool, sizeof *context->user_data);
*u = (struct user_data) {
.conf_pool = loop_pool_create(context->loop, context, "FWUp set pool 1"),
.standby_pool = loop_pool_create(context->loop, context, "FWUp set pool 2"),
.queue = queue_alloc(context, sets_reload)
};
// Ask for config, if already connected (unlikely, but then, the message will get blackholed).
connected(context);
}
#ifdef STATIC
#error "FWUp is not ready for static linkage. Nobody needed it."
#else
......@@ -456,6 +461,7 @@ struct plugin *plugin_info(void) {
.uplink_data_callback = communicate,
.uplink_connected_callback = connected,
.fd_callback = queue_fd_data,
.child_died_callback = child_died,
.imports = imports
};
return &plugin;
......
......@@ -31,20 +31,26 @@
#include <time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#define QUEUE_FLUSH_TIME 5000
#define QUEUE_RETRY_TIME 2000
struct queue {
bool active, timeout_started;
bool broken;
bool broken_timeout_id;
int ipset_pipe;
pid_t pid;
size_t timeout_id;
reload_callback_t reload_callback;
};
struct queue *queue_alloc(struct context *context) {
struct queue *queue_alloc(struct context *context, reload_callback_t reload_callback) {
struct queue *result = mem_pool_alloc(context->permanent_pool, sizeof *result);
*result = (struct queue) {
.active = false
.reload_callback = reload_callback
};
return result;
}
......@@ -88,20 +94,50 @@ static void start(struct context *context, struct queue *queue) {
}
}
static void retry_timeout(struct context *context, void *data, size_t id __attribute__((unused))) {
struct queue *queue = data;
sanity(!queue->timeout_started, "Timeout started and retry timeout fired\n");
ulog(LLOG_WARN, "Trying to re-fill IPsets now\n");
// Leave the broken state and retry filling the ipsets
queue->broken = false;
queue->broken_timeout_id = 0;
sanity(queue->reload_callback, "The reload callback is NULL\n");
queue->reload_callback(context);
}
static void lost(struct context *context, struct queue *queue, bool error) {
sanity(queue->active, "Lost inactive queue\n");
if (error)
ulog(LLOG_WARN, "Lost connection to ipset command %d, data may be out of sync\n", queue->pid);
else
ulog(LLOG_DEBUG, "Closing ipset subcommand\n");
loop_plugin_unregister_fd(context, queue->ipset_pipe);
sanity(close(queue->ipset_pipe) == 0, "Error closing the ipset pipe: %s\n", strerror(errno));
queue->ipset_pipe = 0;
queue->active = false;
queue->pid = 0;
if (queue->timeout_started) {
queue->timeout_started = false;
loop_timeout_cancel(context->loop, queue->timeout_id);
if (queue->broken)
// Already lost, don't do it again.
return;
/*
* In case we got EOF before and errorenous termination of the command later,
* we need not to deactivate, close the pipe and such. But we still
* want to mark it as broken, start the retry timeout and re-synchronize.
*
* If the termination comes sooner than EOF (which is likely, but probably not
* guaranteed), then we mark it broken & inactive in one go and will not
* enter the routine once again.
*/
if (queue->active) {
// Deactivate
if (error)
ulog(LLOG_WARN, "Lost connection to ipset command %d, data may be out of sync\n", queue->pid);
else
ulog(LLOG_DEBUG, "Closing ipset subcommand\n");
loop_plugin_unregister_fd(context, queue->ipset_pipe);
sanity(close(queue->ipset_pipe) == 0, "Error closing the ipset pipe: %s\n", strerror(errno));
queue->ipset_pipe = 0;
queue->active = false;
queue->pid = 0;
if (queue->timeout_started) {
queue->timeout_started = false;
loop_timeout_cancel(context->loop, queue->timeout_id);
}
} else if (error)
ulog(LLOG_WARN, "IPset command considered broken post-morten\n");
if (error) {
queue->broken = true;
queue->broken_timeout_id = loop_timeout_add(context->loop, QUEUE_RETRY_TIME, context, queue, retry_timeout);
}
}
......@@ -112,6 +148,10 @@ static void flush_timeout(struct context *context, void *data, size_t id __attri
}
void enqueue(struct context *context, struct queue *queue, const char *command) {
if (queue->broken) {
ulog(LLOG_DEBUG_VERBOSE, "Not queueing command '%s', the queue is currently broken\n", command);
return;
}
if (!queue->active)
start(context, queue);
sanity(queue->active, "Failed to start the queue\n");
......@@ -195,3 +235,25 @@ void queue_fd_data(struct context *context, int fd, void *userdata) {
return;
}
}
void queue_child_died(struct context *context, int state, pid_t child, struct queue *queue) {
if (!queue->active)
return; // It can't be our child, no queue is currently active
if (queue->pid != child)
return; // Not our child, something else died
bool broken = true;
if (WIFEXITED(state)) {
int ecode = WEXITSTATUS(state);
if (ecode != 0)
ulog(LLOG_ERROR, "The ipset command %d terminated with status %d\n", (int)child, ecode);
else {
ulog(LLOG_DEBUG, "The ipset command %d terminated successfully\n", (int)child);
broken = false;
}
} else if (WIFSIGNALED(state)) {
int signal = WTERMSIG(state);
ulog(LLOG_ERROR, "The ipset command %d terminated with signal %d\n", (int)child, signal);
} else
ulog(LLOG_ERROR, "The ipset command %d died for unknown reason, call the police to investigate\n", (int)child);
lost(context, queue, broken);
}
......@@ -20,10 +20,14 @@
#ifndef UCOLLECT_FWUP_QUEUE_H
#define UCOLLECT_FWUP_QUEUE_H
#include <unistd.h>
struct queue;
struct context;
typedef void (*reload_callback_t)(struct context *context);
/*
* Create a queue for the commands. It will manage
* running the ipset command and feed commands to it.
......@@ -35,8 +39,12 @@ struct context;
* The ipset command is launched on-demand when data are
* set to it. It is stopped either by explicit flush or
* by a short timeout.
*
* The reload callback is used whenever the ipset command dies
* with an error. The queue is disabled for a while and after
* a short time, it is retried.
*/
struct queue *queue_alloc(struct context *context) __attribute__((nonnull)) __attribute__((malloc)) __attribute__((returns_nonnull));
struct queue *queue_alloc(struct context *context, reload_callback_t reload_callback) __attribute__((nonnull)) __attribute__((malloc)) __attribute__((returns_nonnull));
/*
* Enqueue another command. The ipset command is launched
* or previous one is reused. Due to internal OS buffering,
......@@ -55,4 +63,10 @@ void queue_flush(struct context *context, struct queue *queue) __attribute__((no
*/
void queue_fd_data(struct context *context, int fd, void *userdata) __attribute__((nonnull));
/*
* Callback when some child of ucollect dies. Examines it to see if it
* is the ipset command and if so, if it terminated with error or successfully.
*/
void queue_child_died(struct context *context, int status, pid_t pid, struct queue *queue) __attribute__((nonnull));
#endif
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment