2008-08-27 Dan Williams <dcbw@redhat.com>

Ensure zombie children get cleaned up.  To get notifications when children
	die abnormally, g_spawn_async() requires G_SPAWN_DO_NOT_REAP_CHILD, but
	that requires calling waitpid() yourself if you've removed the child watch
	handler before the process has actually died, which NM needs to do in a few
	places.  So ensure that everything uses G_SPAWN_DO_NOT_REAP_CHILD and also
	cleans up after the child when required.  Should fix problems trying to
	activate mobile broadband connections after a previous failure.

	* src/dhcp-manager/nm-dhcp-dhclient.c
	  src/dhcp-manager/nm-dhcp-dhcpcd.c
		- Use G_SPAWN_DO_NOT_REAP_CHILD

	* src/dhcp-manager/nm-dhcp-manager.c
		- (nm_dhcp_device_destroy): ensure child is cleaned up
		- (nm_dhcp_client_stop, nm_dhcp_manager_cancel_transaction_real): always
			block on child quitting, since the non-blocking functionality was
			never actually used

	* src/dnsmasq-manager/nm-dnsmasq-manager.c
		- (dm_watch_cb): child is already reaped here
		- (ensure_killed, nm_dnsmasq_manager_stop): block until child is dead

	* src/nm-device.c
		- (aipd_cleanup): block until child is dead

	* src/named-manager/nm-named-manager.c
		- (run_netconfig): don't use G_SPAWN_DO_NOT_REAP_CHILD if we aren't
			event bothering to watch the child

	* src/ppp-manager/nm-ppp-manager.c
		- (ppp_watch_cb): child is already reaped here
		- (ensure_killed, nm_ppp_manager_stop): block until child is dead

	* src/vpn-manager/nm-vpn-service.c
		- (vpn_service_watch_cb): child is already reaped here
		- (nm_vpn_service_daemon_exec): use G_SPAWN_DO_NOT_REAP_CHILD so that
			status of the child is actually tracked
		- (ensure_killed, finalize): block until child is dead



git-svn-id: http://svn-archive.gnome.org/svn/NetworkManager/trunk@4020 4912f4e0-d625-0410-9fb7-b9a5a253dbdc
This commit is contained in:
Dan Williams
2008-08-27 17:22:32 +00:00
parent 61eb8cf067
commit 7466136be2
10 changed files with 149 additions and 84 deletions

View File

@@ -1,3 +1,44 @@
2008-08-27 Dan Williams <dcbw@redhat.com>
Ensure zombie children get cleaned up. To get notifications when children
die abnormally, g_spawn_async() requires G_SPAWN_DO_NOT_REAP_CHILD, but
that requires calling waitpid() yourself if you've removed the child watch
handler before the process has actually died, which NM needs to do in a few
places. So ensure that everything uses G_SPAWN_DO_NOT_REAP_CHILD and also
cleans up after the child when required. Should fix problems trying to
activate mobile broadband connections after a previous failure.
* src/dhcp-manager/nm-dhcp-dhclient.c
src/dhcp-manager/nm-dhcp-dhcpcd.c
- Use G_SPAWN_DO_NOT_REAP_CHILD
* src/dhcp-manager/nm-dhcp-manager.c
- (nm_dhcp_device_destroy): ensure child is cleaned up
- (nm_dhcp_client_stop, nm_dhcp_manager_cancel_transaction_real): always
block on child quitting, since the non-blocking functionality was
never actually used
* src/dnsmasq-manager/nm-dnsmasq-manager.c
- (dm_watch_cb): child is already reaped here
- (ensure_killed, nm_dnsmasq_manager_stop): block until child is dead
* src/nm-device.c
- (aipd_cleanup): block until child is dead
* src/named-manager/nm-named-manager.c
- (run_netconfig): don't use G_SPAWN_DO_NOT_REAP_CHILD if we aren't
event bothering to watch the child
* src/ppp-manager/nm-ppp-manager.c
- (ppp_watch_cb): child is already reaped here
- (ensure_killed, nm_ppp_manager_stop): block until child is dead
* src/vpn-manager/nm-vpn-service.c
- (vpn_service_watch_cb): child is already reaped here
- (nm_vpn_service_daemon_exec): use G_SPAWN_DO_NOT_REAP_CHILD so that
status of the child is actually tracked
- (ensure_killed, finalize): block until child is dead
2008-08-26 Dan Williams <dcbw@redhat.com>
* system-settings/plugins/keyfile/nm-keyfile-connection.c

View File

@@ -236,7 +236,7 @@ nm_dhcp_client_start (NMDHCPDevice *device, NMSettingIP4Config *s_ip4)
unsigned long int tmp = strtoul (pid_contents, NULL, 10);
if (!((tmp == ULONG_MAX) && (errno == ERANGE)))
nm_dhcp_client_stop (device->iface, (pid_t) tmp, TRUE);
nm_dhcp_client_stop (device->iface, (pid_t) tmp);
remove (device->pid_file);
}
@@ -260,7 +260,7 @@ nm_dhcp_client_start (NMDHCPDevice *device, NMSettingIP4Config *s_ip4)
g_ptr_array_add (dhclient_argv, (gpointer) device->iface);
g_ptr_array_add (dhclient_argv, NULL);
if (!g_spawn_async (NULL, (char **) dhclient_argv->pdata, NULL, 0,
if (!g_spawn_async (NULL, (char **) dhclient_argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD,
&dhclient_child_setup, NULL, &pid, &error)) {
nm_warning ("dhclient failed to start. error: '%s'", error->message);
g_error_free (error);

View File

@@ -83,7 +83,7 @@ nm_dhcp_client_start (NMDHCPDevice *device, NMSettingIP4Config *s_ip4)
unsigned long int tmp = strtoul (pid_contents, NULL, 10);
if (!((tmp == ULONG_MAX) && (errno == ERANGE)))
nm_dhcp_client_stop (device->iface, (pid_t) tmp, TRUE);
nm_dhcp_client_stop (device->iface, (pid_t) tmp);
remove (device->pid_file);
}
@@ -102,7 +102,7 @@ nm_dhcp_client_start (NMDHCPDevice *device, NMSettingIP4Config *s_ip4)
g_ptr_array_add (argv, (gpointer) device->iface);
g_ptr_array_add (argv, NULL);
if (!g_spawn_async (NULL, (char **) argv->pdata, NULL, 0,
if (!g_spawn_async (NULL, (char **) argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD,
&dhcpcd_child_setup, NULL, &pid, &error)) {
nm_warning ("dhcpcd failed to start. error: '%s'", error->message);
g_error_free (error);

View File

@@ -68,7 +68,7 @@ static guint signals[LAST_SIGNAL] = { 0 };
static NMDHCPManager *nm_dhcp_manager_new (void);
static void nm_dhcp_manager_cancel_transaction_real (NMDHCPDevice *device, gboolean blocking);
static void nm_dhcp_manager_cancel_transaction_real (NMDHCPDevice *device);
NMDHCPManager *
nm_dhcp_manager_get (void)
@@ -171,6 +171,9 @@ nm_dhcp_device_destroy (NMDHCPDevice *device)
nm_dhcp_device_timeout_cleanup (device);
nm_dhcp_device_watch_cleanup (device);
if (device->pid)
nm_dhcp_client_stop (device->iface, device->pid);
if (device->options)
g_hash_table_destroy (device->options);
@@ -525,8 +528,6 @@ nm_dhcp_device_new (NMDHCPManager *manager, const char *iface)
device->manager = manager;
nm_dhcp_manager_cancel_transaction_real (device, FALSE);
/* Do this after the transaction cancel since that clears options out */
device->options = g_hash_table_new_full (g_str_hash,
g_str_equal,
@@ -590,7 +591,7 @@ nm_dhcp_manager_begin_transaction (NMDHCPManager *manager,
if (state_is_bound (device->state) || (device->state == DHC_START)) {
/* Cancel any DHCP transaction already in progress */
nm_dhcp_manager_cancel_transaction_real (device, TRUE);
nm_dhcp_manager_cancel_transaction_real (device);
}
nm_info ("Activation (%s) Beginning DHCP transaction.", iface);
@@ -611,26 +612,26 @@ nm_dhcp_manager_begin_transaction (NMDHCPManager *manager,
}
void
nm_dhcp_client_stop (const char * iface,
pid_t pid,
gboolean blocking)
nm_dhcp_client_stop (const char * iface, pid_t pid)
{
int i = 20; /* 4 seconds */
int i = 15; /* 3 seconds */
/* Tell it to quit */
/* Tell it to quit; maybe it wants to send out a RELEASE message */
kill (pid, SIGTERM);
while (blocking && i-- > 0) {
while (i-- > 0) {
gint child_status;
int ret;
ret = waitpid (pid, &child_status, WNOHANG);
if (ret > 0) {
if (ret > 0)
break;
} else if (ret == -1) {
if (ret == -1) {
/* Child already exited */
if (errno == ECHILD)
break;
/* Otherwise, force kill the process */
/* Took too long; shoot it in the head */
i = 0;
break;
}
@@ -640,16 +641,20 @@ nm_dhcp_client_stop (const char * iface,
if (i <= 0) {
nm_warning ("%s: dhcp client pid %d didn't exit, will kill it.", iface, pid);
kill (pid, SIGKILL);
nm_debug ("waiting for dhcp client pid %d to exit", pid);
waitpid (pid, NULL, 0);
nm_debug ("dhcp client pid %d cleaned up", pid);
}
}
static void
nm_dhcp_manager_cancel_transaction_real (NMDHCPDevice *device, gboolean blocking)
nm_dhcp_manager_cancel_transaction_real (NMDHCPDevice *device)
{
if (!device->pid)
return;
nm_dhcp_client_stop (device->iface, device->pid, blocking);
nm_dhcp_client_stop (device->iface, device->pid);
nm_info ("%s: canceled DHCP transaction, dhcp client pid %d",
device->iface,
@@ -705,7 +710,7 @@ nm_dhcp_manager_cancel_transaction (NMDHCPManager *manager,
if (!device || !device->pid)
return;
nm_dhcp_manager_cancel_transaction_real (device, TRUE);
nm_dhcp_manager_cancel_transaction_real (device);
}

View File

@@ -102,8 +102,6 @@ gboolean nm_dhcp_manager_set_dhcp4_config (NMDHCPManager *manager,
gboolean nm_dhcp_manager_process_signal (NMDHCPManager *manager, DBusMessage *message);
gboolean nm_dhcp_client_start (NMDHCPDevice *device, NMSettingIP4Config *s_ip4);
void nm_dhcp_client_stop (const char * iface,
pid_t pid,
gboolean blocking);
void nm_dhcp_client_stop (const char *iface, pid_t pid);
#endif /* NM_DHCP_MANAGER_H */

View File

@@ -216,9 +216,6 @@ dm_watch_cb (GPid pid, gint status, gpointer user_data)
else
g_warning ("dnsmasq died from an unknown cause");
/* Reap child if needed. */
waitpid (pid, NULL, WNOHANG);
priv->pid = 0;
g_signal_emit (manager, signals[STATE_CHANGED], 0, NM_DNSMASQ_STATUS_DEAD);
@@ -411,8 +408,10 @@ ensure_killed (gpointer data)
if (kill (pid, 0) == 0)
kill (pid, SIGKILL);
/* ensure child is reaped */
waitpid (pid, NULL, WNOHANG);
/* ensure the child is reaped */
nm_debug ("waiting for ppp pid %d to exit", pid);
waitpid (pid, NULL, 0);
nm_debug ("ppp pid %d cleaned up", pid);
return FALSE;
}
@@ -434,11 +433,15 @@ nm_dnsmasq_manager_stop (NMDnsMasqManager *manager)
if (priv->pid) {
if (kill (priv->pid, SIGTERM) == 0)
g_timeout_add (2000, ensure_killed, GINT_TO_POINTER (priv->pid));
else
else {
kill (priv->pid, SIGKILL);
/* ensure child is reaped */
waitpid (priv->pid, NULL, WNOHANG);
/* ensure the child is reaped */
nm_debug ("waiting for ppp pid %d to exit", priv->pid);
waitpid (priv->pid, NULL, 0);
nm_debug ("ppp pid %d cleaned up", priv->pid);
}
priv->pid = 0;
}

View File

@@ -129,25 +129,18 @@ netconfig_child_setup (gpointer user_data G_GNUC_UNUSED)
static gint
run_netconfig (GError **error)
{
GPtrArray *argv;
char *argv[5];
gint stdin_fd;
argv = g_ptr_array_new ();
g_ptr_array_add (argv, "/sbin/netconfig");
g_ptr_array_add (argv, "modify");
g_ptr_array_add (argv, "--service");
g_ptr_array_add (argv, "NetworkManager");
g_ptr_array_add (argv, NULL);
argv[0] = "/sbin/netconfig";
argv[1] = "modify";
argv[2] = "--service";
argv[3] = "NetworkManager";
argv[4] = NULL;
if (!g_spawn_async_with_pipes (NULL, (char **) argv->pdata, NULL,
G_SPAWN_DO_NOT_REAP_CHILD,
netconfig_child_setup,
NULL, NULL, &stdin_fd,
NULL, NULL, error)) {
stdin_fd = -1;
}
g_ptr_array_free (argv, TRUE);
if (!g_spawn_async_with_pipes (NULL, argv, NULL, 0, netconfig_child_setup,
NULL, NULL, &stdin_fd, NULL, NULL, error))
return -1;
return stdin_fd;
}

View File

@@ -563,8 +563,12 @@ aipd_cleanup (NMDevice *self)
if (priv->aipd_pid > 0) {
kill (priv->aipd_pid, SIGKILL);
/* Ensure child is reaped */
waitpid (priv->aipd_pid, NULL, WNOHANG);
/* ensure the child is reaped */
nm_debug ("waiting for ppp pid %d to exit", priv->aipd_pid);
waitpid (priv->aipd_pid, NULL, 0);
nm_debug ("ppp pid %d cleaned up", priv->aipd_pid);
priv->aipd_pid = -1;
}

View File

@@ -663,6 +663,8 @@ ppp_watch_cb (GPid pid, gint status, gpointer user_data)
NMPPPManagerPrivate *priv = NM_PPP_MANAGER_GET_PRIVATE (manager);
guint err;
g_assert (pid == priv->pid);
if (WIFEXITED (status)) {
err = WEXITSTATUS (status);
if (err != 0)
@@ -674,12 +676,8 @@ ppp_watch_cb (GPid pid, gint status, gpointer user_data)
else
nm_warning ("ppp pid %d died from an unknown cause", priv->pid);
/* Reap child if needed. */
nm_debug ("ppp pid %d cleaned up", priv->pid);
waitpid (pid, NULL, WNOHANG);
priv->pid = 0;
g_signal_emit (manager, signals[STATE_CHANGED], 0, NM_PPP_STATUS_DEAD);
}
@@ -949,7 +947,8 @@ ensure_killed (gpointer data)
kill (pid, SIGKILL);
/* ensure the child is reaped */
waitpid (pid, NULL, WNOHANG);
nm_debug ("waiting for ppp pid %d to exit", pid);
waitpid (pid, NULL, 0);
nm_debug ("ppp pid %d cleaned up", pid);
return FALSE;
@@ -991,8 +990,10 @@ nm_ppp_manager_stop (NMPPPManager *manager)
g_timeout_add (2000, ensure_killed, GINT_TO_POINTER (priv->pid));
else {
kill (priv->pid, SIGKILL);
/* ensure the child is reaped */
waitpid (priv->pid, NULL, WNOHANG);
nm_debug ("waiting for ppp pid %d to exit", priv->pid);
waitpid (priv->pid, NULL, 0);
nm_debug ("ppp pid %d cleaned up", priv->pid);
}

View File

@@ -45,6 +45,7 @@ typedef struct {
GPid pid;
GSList *connections;
guint service_start_timeout;
guint service_child_watch;
gulong name_owner_id;
} NMVPNServicePrivate;
@@ -207,9 +208,8 @@ vpn_service_watch_cb (GPid pid, gint status, gpointer user_data)
nm_warning ("VPN service '%s' died from an unknown cause",
nm_vpn_service_get_name (service));
/* Reap child if needed. */
waitpid (pid, NULL, WNOHANG);
priv->pid = 0;
priv->service_child_watch = 0;
nm_vpn_service_connections_stop (service, TRUE, NM_VPN_CONNECTION_STATE_REASON_SERVICE_STOPPED);
}
@@ -232,8 +232,7 @@ static gboolean
nm_vpn_service_daemon_exec (NMVPNService *service, GError **error)
{
NMVPNServicePrivate *priv = NM_VPN_SERVICE_GET_PRIVATE (service);
GPtrArray *vpn_argv;
gboolean launched;
char *vpn_argv[2];
gboolean success = FALSE;
GError *spawn_error = NULL;
@@ -241,42 +240,28 @@ nm_vpn_service_daemon_exec (NMVPNService *service, GError **error)
g_return_val_if_fail (error != NULL, FALSE);
g_return_val_if_fail (*error == NULL, FALSE);
vpn_argv = g_ptr_array_new ();
g_ptr_array_add (vpn_argv, priv->program);
g_ptr_array_add (vpn_argv, NULL);
vpn_argv[0] = priv->program;
vpn_argv[1] = NULL;
launched = g_spawn_async (NULL,
(char **) vpn_argv->pdata,
NULL,
0,
nm_vpn_service_child_setup,
NULL,
&priv->pid,
success = g_spawn_async (NULL, vpn_argv, NULL, G_SPAWN_DO_NOT_REAP_CHILD,
nm_vpn_service_child_setup, NULL, &priv->pid,
&spawn_error);
g_ptr_array_free (vpn_argv, TRUE);
if (launched) {
GSource *vpn_watch;
vpn_watch = g_child_watch_source_new (priv->pid);
g_source_set_callback (vpn_watch, (GSourceFunc) vpn_service_watch_cb, service, NULL);
g_source_attach (vpn_watch, NULL);
g_source_unref (vpn_watch);
if (success) {
nm_info ("VPN service '%s' started (%s), PID %d",
nm_vpn_service_get_name (service), priv->dbus_service, priv->pid);
priv->service_child_watch = g_child_watch_add (priv->pid, vpn_service_watch_cb, service);
priv->service_start_timeout = g_timeout_add (5000, nm_vpn_service_timeout, service);
success = TRUE;
} else {
nm_warning ("VPN service '%s': could not launch the VPN service. error: (%d) %s.",
nm_vpn_service_get_name (service), spawn_error->code, spawn_error->message);
g_set_error (error,
NM_VPN_MANAGER_ERROR, NM_VPN_MANAGER_ERROR_SERVICE_START_FAILED,
"%s", spawn_error->message);
"%s", spawn_error ? spawn_error->message : "unknown g_spawn_async() error");
nm_vpn_service_connections_stop (service, TRUE, NM_VPN_CONNECTION_STATE_REASON_SERVICE_START_FAILED);
if (spawn_error)
g_error_free (spawn_error);
}
@@ -418,6 +403,22 @@ nm_vpn_service_init (NMVPNService *service)
service);
}
static gboolean
ensure_killed (gpointer data)
{
int pid = GPOINTER_TO_INT (data);
if (kill (pid, 0) == 0)
kill (pid, SIGKILL);
/* ensure the child is reaped */
nm_debug ("waiting for vpn service pid %d to exit", pid);
waitpid (pid, NULL, 0);
nm_debug ("vpn service pid %d cleaned up", pid);
return FALSE;
}
static void
finalize (GObject *object)
{
@@ -431,6 +432,25 @@ finalize (GObject *object)
NM_VPN_CONNECTION_STATE_REASON_SERVICE_STOPPED);
g_signal_handler_disconnect (priv->dbus_mgr, priv->name_owner_id);
if (priv->service_child_watch)
g_source_remove (priv->service_child_watch);
if (priv->pid) {
if (kill (priv->pid, SIGTERM) == 0)
g_timeout_add (2000, ensure_killed, GINT_TO_POINTER (priv->pid));
else {
kill (priv->pid, SIGKILL);
/* ensure the child is reaped */
nm_debug ("waiting for vpn service pid %d to exit", priv->pid);
waitpid (priv->pid, NULL, 0);
nm_debug ("vpn service pid %d cleaned up", priv->pid);
}
priv->pid = 0;
}
g_object_unref (priv->dbus_mgr);
g_free (priv->name);