Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restart daemons after updating, gmond hash table locking (for release 3.6.X) #169

Open
wants to merge 5 commits into
base: release/3.6
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ script: "./bootstrap && ./configure --with-gmetad --without-php --with-perl=/usr
before_script:
- echo 'yes' | sudo add-apt-repository ppa:ondrej/php5
- sudo apt-get update
- sudo apt-get install -y libapr1-dev libconfuse-dev libexpat1-dev libpcre3-dev libssl-dev librrd-dev libperl-dev libmemcached-dev libtool m4 gperf zlib1g-dev
- sudo apt-get -o Dpkg::Options::="--force-overwrite" install -y libapr1-dev libconfuse-dev libexpat1-dev libpcre3-dev libssl-dev librrd-dev libperl-dev libmemcached-dev libtool m4 gperf zlib1g-dev
14 changes: 13 additions & 1 deletion ganglia.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ gmetad packages
%build
%configure --with-gmetad --enable-status --sysconfdir=%{conf_dir}
%ifnarch noarch
make
%__make
%endif
#cd gmetad-python
#%{__python} setup.py build
Expand Down Expand Up @@ -247,6 +247,12 @@ then
/sbin/chkconfig --del gmetad
fi

%postun gmetad
if [ "$1" -ge 1 ]
then
/etc/init.d/gmetad condrestart
fi

#%preun gmetad-python
#if [ "$1" = 0 ]
#then
Expand All @@ -261,6 +267,12 @@ then
/sbin/chkconfig --del gmond
fi

%postun gmond
if [ "$1" -ge 1 ]
then
/etc/init.d/gmond condrestart
fi

%post -n libganglia -p /sbin/ldconfig

%postun -n libganglia -p /sbin/ldconfig
Expand Down
109 changes: 76 additions & 33 deletions gmetad/gmetad.init
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,82 @@ test -f /etc/sysconfig/gmetad && . /etc/sysconfig/gmetad

export RRDCACHED_ADDRESS

RETVAL=0
start() {
[ -x $GMETAD ] || exit 5
[ -f /etc/ganglia/gmetad.conf ] || exit 6
echo -n "Starting GANGLIA gmetad: "
daemon $GMETAD
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/gmetad
return $RETVAL
}

stop() {
echo -n "Shutting down GANGLIA gmetad: "
killproc $GMETAD
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/gmetad
return $RETVAL
}

restart() {
stop
start
}

reload() {
restart
}

force_reload() {
restart
}

rh_status() {
status $GMETAD
}

rh_status_q() {
rh_status >/dev/null 2>&1
}

usage() {
echo "Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
}

case "$1" in
start)
echo -n "Starting GANGLIA gmetad: "
[ -f $GMETAD ] || exit 1

daemon $GMETAD
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/gmetad
;;

stop)
echo -n "Shutting down GANGLIA gmetad: "
killproc $GMETAD
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/gmetad
;;

restart|reload)
$0 stop
$0 start
RETVAL=$?
;;
status)
status $GMETAD
RETVAL=$?
;;
*)
echo "Usage: $0 {start|stop|restart|status}"
exit 1
start)
rh_status_q && exit 0
$1
;;
stop)
rh_status_q || exit 1
$1
;;
restart)
$1
;;
reload)
rh_status_q || exit 7
$1
;;
force-reload)
force_reload
;;
status)
rh_status
;;
condrestart|try-restart)
rh_status_q || exit 0
restart
;;
usage)
$1
;;
*)
usage
exit 2
esac

exit $RETVAL
exit $?
48 changes: 31 additions & 17 deletions gmond/gmond.c
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,9 @@ Ganglia_host_get( char *remIP, apr_sockaddr_t *sa, Ganglia_metric_id *metric_id)
remoteip = spoofIP;
}

apr_thread_mutex_lock(hosts_mutex);
hostdata = (Ganglia_host *)apr_hash_get( hosts, remoteip, APR_HASH_KEY_STRING );
apr_thread_mutex_unlock(hosts_mutex);
if(!hostdata)
{
/* Lookup the hostname or use the proxy information if available */
Expand Down Expand Up @@ -1173,8 +1175,11 @@ Ganglia_metadata_check(Ganglia_host *host, Ganglia_value_msg *vmsg )
{
char *metric_name = vmsg->Ganglia_value_msg_u.gstr.metric_id.name;
int is_spoof_msg = vmsg->Ganglia_value_msg_u.gstr.metric_id.spoof;
Ganglia_metadata *metric =
(Ganglia_metadata *)apr_hash_get(host->metrics, metric_name, APR_HASH_KEY_STRING);
Ganglia_metadata *metric;

apr_thread_mutex_lock(host->mutex);
metric = (Ganglia_metadata *)apr_hash_get(host->metrics, metric_name, APR_HASH_KEY_STRING);
apr_thread_mutex_unlock(host->mutex);

if(!metric)
{
Expand Down Expand Up @@ -1224,14 +1229,19 @@ Ganglia_metadata_free( Ganglia_metadata *metric )
void
Ganglia_metadata_save( Ganglia_host *host, Ganglia_metadata_msg *message )
{
/* Search for the Ganglia_metadata in the Ganglia_host */
sanitize_metric_name(message->Ganglia_metadata_msg_u.gfull.metric_id.name, message->Ganglia_metadata_msg_u.gfull.metric_id.spoof);
Ganglia_metadata *metric =
(Ganglia_metadata *)apr_hash_get(host->metrics,
message->Ganglia_metadata_msg_u.gfull.metric_id.name,
APR_HASH_KEY_STRING);
Ganglia_metadata *metric;

if(!host || !message)
return;

/* Search for the Ganglia_metadata in the Ganglia_host */
sanitize_metric_name(message->Ganglia_metadata_msg_u.gfull.metric_id.name, message->Ganglia_metadata_msg_u.gfull.metric_id.spoof);

apr_thread_mutex_lock(host->mutex);
metric = (Ganglia_metadata *)apr_hash_get(host->metrics,
message->Ganglia_metadata_msg_u.gfull.metric_id.name,
APR_HASH_KEY_STRING);
apr_thread_mutex_unlock(host->mutex);

if(metric)
{
Expand Down Expand Up @@ -1334,14 +1344,18 @@ Ganglia_metadata_request( Ganglia_host *host, Ganglia_metadata_msg *message )
void
Ganglia_value_save( Ganglia_host *host, Ganglia_value_msg *message )
{
/* Search for the Ganglia_metric in the Ganglia_host */
Ganglia_metadata *metric =
(Ganglia_metadata *)apr_hash_get( host->gmetrics,
message->Ganglia_value_msg_u.gstr.metric_id.name,
APR_HASH_KEY_STRING);
Ganglia_metadata *metric;

if(!host || !message)
return;

/* Search for the Ganglia_metric in the Ganglia_host */
apr_thread_mutex_lock(host->mutex);
metric = (Ganglia_metadata *)apr_hash_get(host->gmetrics,
message->Ganglia_value_msg_u.gstr.metric_id.name,
APR_HASH_KEY_STRING);
apr_thread_mutex_unlock(host->mutex);

if(metric)
{
apr_pool_clear(metric->pool);
Expand Down Expand Up @@ -3088,6 +3102,7 @@ cleanup_data( apr_pool_t *pool, apr_time_t now)
apr_hash_index_t *hi, *metric_hi;

/* Walk the host hash */
apr_thread_mutex_lock(hosts_mutex);
for(hi = apr_hash_first(pool, hosts);
hi;
hi = apr_hash_next(hi))
Expand All @@ -3102,15 +3117,14 @@ cleanup_data( apr_pool_t *pool, apr_time_t now)
/* this host is older than dmax... delete it */
debug_msg("deleting old host '%s' from host hash'", host->hostname);
/* remove it from the hash */
apr_thread_mutex_lock(hosts_mutex);
apr_hash_set( hosts, host->ip, APR_HASH_KEY_STRING, NULL);
apr_thread_mutex_unlock(hosts_mutex);
/* free all its memory */
apr_pool_destroy( host->pool);
}
else
{
/* this host isn't being deleted but it might have some stale gmetric data */
apr_thread_mutex_lock(host->mutex);
for( metric_hi = apr_hash_first( pool, host->metrics );
metric_hi;
metric_hi = apr_hash_next( metric_hi ))
Expand All @@ -3132,16 +3146,16 @@ cleanup_data( apr_pool_t *pool, apr_time_t now)
debug_msg("deleting old metric '%s' from host '%s'", metric->name, host->hostname);

/* remove the metric from the metric and values hash */
apr_thread_mutex_lock(host->mutex);
apr_hash_set( host->metrics, metric->name, APR_HASH_KEY_STRING, NULL);
apr_hash_set( host->gmetrics, metric->name, APR_HASH_KEY_STRING, NULL);
apr_thread_mutex_unlock(host->mutex);
/* destroy any memory that was allocated for this gmetric */
apr_pool_destroy( metric->pool );
}
}
apr_thread_mutex_unlock(host->mutex);
}
}
apr_thread_mutex_unlock(hosts_mutex);

apr_pool_clear( pool );
}
Expand Down
101 changes: 72 additions & 29 deletions gmond/gmond.init
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,82 @@ export TMPDIR

. /etc/rc.d/init.d/functions

RETVAL=0
start() {
[ -x $GMOND ] || exit 5
[ -f /etc/ganglia/gmond.conf ] || exit 6
echo -n "Starting GANGLIA gmond: "
daemon $GMOND
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/gmond
return $RETVAL
}

case "$1" in
start)
echo -n "Starting GANGLIA gmond: "
[ -f $GMOND ] || exit 1
stop() {
echo -n "Shutting down GANGLIA gmond: "
killproc gmond
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/gmond
return $RETVAL
}

daemon $GMOND
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/gmond
;;
restart() {
stop
start
}

stop)
echo -n "Shutting down GANGLIA gmond: "
killproc gmond
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/gmond
;;
reload() {
restart
}

force_reload() {
restart
}

restart|reload)
$0 stop
$0 start
RETVAL=$?
rh_status() {
status $GMOND
}

rh_status_q() {
rh_status >/dev/null 2>&1
}

usage() {
echo "Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
}

case "$1" in
start)
rh_status_q && exit 0
$1
;;
status)
status gmond
RETVAL=$?
stop)
rh_status_q || exit 1
$1
;;
*)
echo "Usage: $0 {start|stop|restart|status}"
exit 1
restart)
$1
;;
reload)
rh_status_q || exit 7
$1
;;
force-reload)
force_reload
;;
status)
rh_status
;;
condrestart|try-restart)
rh_status_q || exit 0
restart
;;
usage)
$1
;;
*)
usage
exit 2
esac

exit $RETVAL
exit $?