diff --git a/9781484200957.jpg b/9781484200957.jpg new file mode 100644 index 0000000..fc18255 Binary files /dev/null and b/9781484200957.jpg differ diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..9d8c332 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,27 @@ +Freeware License, some rights reserved + +Copyright (c) 2015 Michael Frampton + +Permission is hereby granted, free of charge, to anyone obtaining a copy +of this software and associated documentation files (the "Software"), +to work with the Software within the limits of freeware distribution and fair use. +This includes the rights to use, copy, and modify the Software for personal use. +Users are also allowed and encouraged to submit corrections and modifications +to the Software for the benefit of other users. + +It is not allowed to reuse, modify, or redistribute the Software for +commercial use in any way, or for a user’s educational materials such as books +or blog articles without prior permission from the copyright holder. + +The above copyright notice and this permission notice need to be included +in all copies or substantial portions of the software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..b850e69 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +#Apress Source Code + +This repository accompanies [*Big Data Made Easy*](http://www.apress.com/9781484200957) by Michael Frampton (Apress, 2015). + +![Cover image](9781484200957.jpg) + +Download the files as a zip using the green button, or clone the repository to your machine using Git. + +##Releases + +Release v1.0 corresponds to the code in the published book, without corrections or updates. + +##Contributions + +See the file Contributing.md for more information on how you can contribute to this repository. diff --git a/contributing.md b/contributing.md new file mode 100644 index 0000000..f6005ad --- /dev/null +++ b/contributing.md @@ -0,0 +1,14 @@ +# Contributing to Apress Source Code + +Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. + +## How to Contribute + +1. Make sure you have a GitHub account. +2. Fork the repository for the relevant book. +3. Create a new branch on which to make your change, e.g. +`git checkout -b my_code_contribution` +4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. +5. Submit a pull request. + +Thank you for your contribution! \ No newline at end of file diff --git a/package/chapter 10/pentaho/basic mapred job.kjb b/package/chapter 10/pentaho/basic mapred job.kjb new file mode 100644 index 0000000..1a2b459 --- /dev/null +++ b/package/chapter 10/pentaho/basic mapred job.kjb @@ -0,0 +1,157 @@ + + + basic mapred job + + + + 0 + / + - + 2014/10/04 15:08:07.682 + - + 2014/10/15 17:54:38.889 + + + + AgileBI + localhost + MONETDB + Native + pentaho-instaview + 50006 + monetdb + Encrypted 2be98afc86aa7f2e4cb14a17edb86abd8 + + + + true + + EXTRA_OPTION_INFOBRIGHT.characterEncodingUTF-8 + EXTRA_OPTION_MYSQL.defaultFetchSize500 + EXTRA_OPTION_MYSQL.useCursorFetchtrue + PORT_NUMBER50006 + PRESERVE_RESERVED_WORD_CASEY + SUPPORTS_BOOLEAN_DATA_TYPEY + SUPPORTS_TIMESTAMP_DATA_TYPEY + + + + hn1nnhn1nn80hadoopEncrypted 2be98afc82cce80899c18bc758fc0fc8eN + + + + + + + +ID_JOBYID_JOBCHANNEL_IDYCHANNEL_IDJOBNAMEYJOBNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELDEXECUTING_SERVERNEXECUTING_SERVEREXECUTING_USERNEXECUTING_USERSTART_JOB_ENTRYNSTART_JOB_ENTRYCLIENTNCLIENT + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATEJOBNAMEYTRANSNAMEJOBENTRYNAMEYSTEPNAMELINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSRESULTYRESULTNR_RESULT_ROWSYNR_RESULT_ROWSNR_RESULT_FILESYNR_RESULT_FILESLOG_FIELDNLOG_FIELDCOPY_NRNCOPY_NR + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID + + +
+ + + + + + + +ID_JOB_RUNYID_JOB_RUNID_JOBYID_JOBJOBNAMEYJOBNAMENAMESPACEYNAMESPACECHECKPOINT_NAMEYCHECKPOINT_NAMECHECKPOINT_COPYNRYCHECKPOINT_COPYNRATTEMPT_NRYATTEMPT_NRJOB_RUN_START_DATEYJOB_RUN_START_DATELOGDATEYLOGDATERESULT_XMLYRESULT_XMLPARAMETER_XMLYPARAMETER_XML + N + + + + START + + SPECIAL + Y + N + N + 0 + 0 + 60 + 12 + 0 + 1 + 1 + N + Y + 0 + 35 + 34 + + + Pentaho MapReduce + + HadoopTransJobExecutorPlugin + pmr1 + + + + C:\pentaho\repository\mapper.ktr + + + + + Y + + + + C:\pentaho\repository\reducer.ktr + Y + MapReduce Input + MapReduce Output + + + MapReduce Input + MapReduce Output + N + 60 + /data/pentaho/rdbms + org.apache.hadoop.mapred.TextInputFormat + /data/pentaho/result + Y + N + N + N + N + org.apache.hadoop.mapred.TextOutputFormat + hc2nn + 8020 + hc2nn + 8032 + 1 + 1 + + + N + Y + 0 + 173 + 28 + + + + + START + Pentaho MapReduce + 0 + 0 + Y + Y + Y + + + + + + diff --git a/package/chapter 10/pentaho/mapper.ktr b/package/chapter 10/pentaho/mapper.ktr new file mode 100644 index 0000000..1d2350e --- /dev/null +++ b/package/chapter 10/pentaho/mapper.ktr @@ -0,0 +1,408 @@ + + + mapper + + + + Normal + 0 + / + + + + + +
+ + + +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELDEXECUTING_SERVERNEXECUTING_SERVEREXECUTING_USERNEXECUTING_USERCLIENTNCLIENT + + +
+ + +ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATEMETRICS_DATEYMETRICS_DATEMETRICS_CODEYMETRICS_CODEMETRICS_DESCRIPTIONYMETRICS_DESCRIPTIONMETRICS_SUBJECTYMETRICS_SUBJECTMETRICS_TYPEYMETRICS_TYPEMETRICS_VALUEYMETRICS_VALUE + + + +
+ + 0.0 + 0.0 + + 10000 + 50 + 50 + N + Y + 50000 + Y + + N + 1000 + 100 + + + + + + hn1nnhn1nn80hadoopEncrypted 2be98afc82cce80899c18bc758fc0fc8eN + + + + - + 2014/09/28 17:10:02.638 + - + 2014/10/08 19:52:44.082 + + + + + set key valueMapReduce OutputY + MapReduce InputSplit FieldsY + Split FieldsFilter rowsY + Filter rowsDummy (do nothing)Y + Filter rowsset key valueY + + + Dummy (do nothing) + Dummy + + Y + + 1 + + none + + + + + 321 + 306 + Y + + + + + Filter rows + FilterRows + + Y + + 1 + + none + + +set key value +Dummy (do nothing) + + + N + Field1 + IS NOT NULL + + + + + + 320 + 119 + Y + + + + + MapReduce Input + HadoopEnterPlugin + + Y + + 1 + + none + + + key + String + 0 + 2 + value + String + 0 + 2 + + + 53 + 118 + Y + + + + + MapReduce Output + HadoopExitPlugin + + N + + 1 + + none + + + comb_key + comb_value + + + 584 + 118 + Y + + + + + Split Fields + FieldSplitter + + Y + + 1 + + none + + + value + , + + Field1 + + N + String + + + + -1 + -1 + + + none + Field2 + + N + String + + + + -1 + -1 + + + none + Field3 + + N + String + + + + -1 + -1 + + + none + Field4 + + N + String + + + + -1 + -1 + + + none + Field5 + + N + String + + + + -1 + -1 + + + none + Field6 + + N + String + + + + -1 + -1 + + + none + Field7 + + N + String + + + + -1 + -1 + + + none + Field8 + + N + String + + + + -1 + -1 + + + none + Field9 + + N + String + + + + -1 + -1 + + + none + Field10 + + N + String + + + + -1 + -1 + + + none + Field11 + + N + String + + + + -1 + -1 + + + none + Field12 + + N + String + + + + -1 + -1 + + + none + Field13 + + N + String + + + + -1 + -1 + + + none + Field14 + + N + String + + + + -1 + -1 + + + none + + + 205 + 122 + Y + + + + + set key value + Janino + + Y + + 1 + + none + + + comb_key +Field2+'-' + Field3 +String +-1 +-1 + + + comb_value +1 +Integer +-1 +-1 + + + + + 446 + 116 + Y + + + + + + + + N + + diff --git a/package/chapter 10/pentaho/reducer.ktr b/package/chapter 10/pentaho/reducer.ktr new file mode 100644 index 0000000..d23667a --- /dev/null +++ b/package/chapter 10/pentaho/reducer.ktr @@ -0,0 +1,209 @@ + + + reducer + + + + Normal + 0 + / + + + + + +
+ + + +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELDEXECUTING_SERVERNEXECUTING_SERVEREXECUTING_USERNEXECUTING_USERCLIENTNCLIENT + + +
+ + +ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD + + +
+ +ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATEMETRICS_DATEYMETRICS_DATEMETRICS_CODEYMETRICS_CODEMETRICS_DESCRIPTIONYMETRICS_DESCRIPTIONMETRICS_SUBJECTYMETRICS_SUBJECTMETRICS_TYPEYMETRICS_TYPEMETRICS_VALUEYMETRICS_VALUE + + + +
+ + 0.0 + 0.0 + + 10000 + 50 + 50 + N + Y + 50000 + Y + + N + 1000 + 100 + + + + + + hn1nnhn1nn80hadoopEncrypted 2be98afc82cce80899c18bc758fc0fc8eN + + + + - + 2014/09/29 19:07:44.737 + - + 2014/10/09 19:04:40.425 + + + + + MapReduce InputSort rowsY + Sort rowsGroup byY + Group byMapReduce OutputY + + + Group by + GroupBy + + Y + + 1 + + none + + + N + N + + %%java.io.tmpdir%% + grp + N + + N + + + key + + + + + summed_val + value + SUM + + + + + + 379 + 62 + Y + + + + + MapReduce Input + HadoopEnterPlugin + + Y + + 1 + + none + + + key + String + 0 + 2 + value + Integer + 0 + 5 + + + 97 + 59 + Y + + + + + MapReduce Output + HadoopExitPlugin + + Y + + 1 + + none + + + key + summed_val + + + 538 + 62 + Y + + + + + Sort rows + SortRows + + Y + + 1 + + none + + + %%java.io.tmpdir%% + out + 10000 + + N + + N + + + key + Y + N + N + + + + + 251 + 59 + Y + + + + + + + + N + + diff --git a/package/chapter 10/talend/tmr1_0.1.item b/package/chapter 10/talend/tmr1_0.1.item new file mode 100644 index 0000000..bcfedb8 --- /dev/null +++ b/package/chapter 10/talend/tmr1_0.1.item @@ -0,0 +1,670 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/chapter 10/talend/tmr1_0.1.properties b/package/chapter 10/talend/tmr1_0.1.properties new file mode 100644 index 0000000..dedbafd --- /dev/null +++ b/package/chapter 10/talend/tmr1_0.1.properties @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/package/chapter 10/talend/tmr1_0.1.screenshot b/package/chapter 10/talend/tmr1_0.1.screenshot new file mode 100644 index 0000000..da176be --- /dev/null +++ b/package/chapter 10/talend/tmr1_0.1.screenshot @@ -0,0 +1,2 @@ + + diff --git a/package/chapter 11/hunk/README b/package/chapter 11/hunk/README new file mode 100644 index 0000000..9ee5890 --- /dev/null +++ b/package/chapter 11/hunk/README @@ -0,0 +1,6 @@ + +For details on configuration files, see $SPLUNK_HOME/etc/system/README/ directory +for specifications and examples. + +These files were taken from the Splunk Hunk install on the CentOS 6.2 Linux +server hc2nn under /usr/local/hunk/etc/system/local diff --git a/package/chapter 11/hunk/indexes.conf b/package/chapter 11/hunk/indexes.conf new file mode 100644 index 0000000..5bfda20 --- /dev/null +++ b/package/chapter 11/hunk/indexes.conf @@ -0,0 +1,13 @@ +[provider:cdh5] +vix.family = hadoop +vix.command.arg.3 = $SPLUNK_HOME/bin/jars/SplunkMR-s6.0-hy2.0.jar +vix.env.HADOOP_HOME = /usr/lib/hadoop +vix.env.JAVA_HOME = /usr/lib/jvm/jre-1.6.0-openjdk.x86_64 +vix.fs.default.name = hdfs://hc2nn:8020 +vix.splunk.home.hdfs = /user/hadoop/hunk/workdir +vix.mapreduce.framework.name = yarn +vix.yarn.resourcemanager.address = hc2nn:8032 +vix.yarn.resourcemanager.scheduler.address = hc2nn:8030 +vix.mapred.job.map.memory.mb = 1024 +vix.yarn.app.mapreduce.am.staging-dir = /user +vix.splunk.search.recordreader.csv.regex = \.text$ diff --git a/package/chapter 11/hunk/inputs.conf b/package/chapter 11/hunk/inputs.conf new file mode 100644 index 0000000..9819713 --- /dev/null +++ b/package/chapter 11/hunk/inputs.conf @@ -0,0 +1,2 @@ +[default] +host = hc2nn diff --git a/package/chapter 11/hunk/limits.conf b/package/chapter 11/hunk/limits.conf new file mode 100644 index 0000000..d3988ca --- /dev/null +++ b/package/chapter 11/hunk/limits.conf @@ -0,0 +1,2 @@ +[inputproc] +file_tracking_db_threshold_mb = 500 diff --git a/package/chapter 11/hunk/migration.conf b/package/chapter 11/hunk/migration.conf new file mode 100644 index 0000000..8b3f5d9 --- /dev/null +++ b/package/chapter 11/hunk/migration.conf @@ -0,0 +1,2 @@ +[history] +migrated_cluster_app_to_underscore_cluster = true diff --git a/package/chapter 11/hunk/props.conf b/package/chapter 11/hunk/props.conf new file mode 100644 index 0000000..6dfab14 --- /dev/null +++ b/package/chapter 11/hunk/props.conf @@ -0,0 +1,2 @@ +[source::/data/hunk/rdbms/...] +REPORT-csvreport = extractcsv diff --git a/package/chapter 11/hunk/server.conf b/package/chapter 11/hunk/server.conf new file mode 100644 index 0000000..8a52726 --- /dev/null +++ b/package/chapter 11/hunk/server.conf @@ -0,0 +1,23 @@ +[general] +serverName = hc2nn + +[sslConfig] +sslKeysfilePassword = $1$clM2rf4q1aVj + +[lmpool:auto_generated_pool_download-trial] +description = auto_generated_pool_download-trial +quota = MAX +slaves = * +stack_id = download-trial + +[lmpool:auto_generated_pool_forwarder] +description = auto_generated_pool_forwarder +quota = MAX +slaves = * +stack_id = forwarder + +[lmpool:auto_generated_pool_free] +description = auto_generated_pool_free +quota = MAX +slaves = * +stack_id = free diff --git a/package/chapter 11/hunk/transforms.conf b/package/chapter 11/hunk/transforms.conf new file mode 100644 index 0000000..7c4096b --- /dev/null +++ b/package/chapter 11/hunk/transforms.conf @@ -0,0 +1,3 @@ +[extractcsv] +DELIMS="\," +FIELDS="year","manufacturer","model","class","engine size","cyclinders","transmission","Fuel Type","fuel_city_l_100km","fuel_hwy_l_100km","fuel_city_mpg","fuel_hwy_mpg","fuel_l_yr","c02_g_km" diff --git a/package/chapter 4/Perl/mapper.pl b/package/chapter 4/Perl/mapper.pl new file mode 100644 index 0000000..356dced --- /dev/null +++ b/package/chapter 4/Perl/mapper.pl @@ -0,0 +1,58 @@ +#!/usr/bin/perl + +my $line; +my @words = (); +my $word; + +# process input line by line + +foreach $line ( ) +{ + # strip new line from string + + chomp( $line ); + + # strip line into words using space + + @words = split( ' ', $line ); + + # now print the name value pairs + + foreach $word (@words) + { + # convert word to lower case + + $word = lc( $word ) ; + + # remove unwanted characters from string + + $word =~ s/!//g ; # remove ! character from word + $word =~ s/"//g ; # remove " character from word + $word =~ s/'//g ; # remove ' character from word + $word =~ s/_//g ; # remove _ character from word + $word =~ s/;//g ; # remove ; character from word + $word =~ s/\(//g ; # remove ( character from word + $word =~ s/\)//g ; # remove ) character from word + $word =~ s/\#//g ; # remove # character from word + $word =~ s/\$//g ; # remove $ character from word + $word =~ s/\&//g ; # remove & character from word + $word =~ s/\.//g ; # remove . character from word + $word =~ s/\,//g ; # remove , character from word + $word =~ s/\*//g ; # remove * character from word + $word =~ s/\-//g ; # remove - character from word + $word =~ s/\///g ; # remove / character from word + $word =~ s/\{//g ; # remove { character from word + $word =~ s/\}//g ; # remove } character from word + $word =~ s/\}//g ; # remove } character from word + + # only print the key,value pair if the key is not + # empty + + if ( $word ne "" ) + { + print "$word,1\n" ; + } + + } + +} diff --git a/package/chapter 4/Perl/reducer.pl b/package/chapter 4/Perl/reducer.pl new file mode 100644 index 0000000..8b7547b --- /dev/null +++ b/package/chapter 4/Perl/reducer.pl @@ -0,0 +1,49 @@ +#!/usr/bin/perl + +my $line; +my @lineparams = (); +my $oldword,$word,$value,$sumval; + +# the reducer is going to receive a key,value pair from stdin and it +# will need to sum up the values. It will need to split the name and +# value out of the comma separated string. + +$oldword = "" ; + +foreach $line ( ) +{ + # strip new line from string + + chomp( $line ); + + # split the line into the word and value + + @lineparams = split( '\,', $line ); + + $word = $lineparams[0]; + $value = $lineparams[1]; + + # Hadoop sorts the data by value so just sum similar word values + + if ( $word eq $oldword ) + { + $sumval += $value ; + } + else + { + if ( $oldword ne "" ) + { + print "$oldword,$sumval\n" ; + } + $sumval = 1 ; + } + + # now print the name value pairs + + $oldword = $word ; +} + +# remember to print last word + +print "$oldword,$sumval\n" ; + diff --git a/package/chapter 4/Perl/test1.sh b/package/chapter 4/Perl/test1.sh new file mode 100644 index 0000000..2cbf935 --- /dev/null +++ b/package/chapter 4/Perl/test1.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# test the mapper + +echo "one one one two three" | ./mapper.pl + + diff --git a/package/chapter 4/Perl/test2.sh b/package/chapter 4/Perl/test2.sh new file mode 100644 index 0000000..eac9891 --- /dev/null +++ b/package/chapter 4/Perl/test2.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# test the mapper + +echo "one one one two three" | ./mapper.pl | ./reducer.pl + diff --git a/package/chapter 4/Perl/wc_clean.sh b/package/chapter 4/Perl/wc_clean.sh new file mode 100644 index 0000000..673e264 --- /dev/null +++ b/package/chapter 4/Perl/wc_clean.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# Clean the hadoop perl run data directory + +hadoop dfs -rmr /user/hadoop/perl/results_wc + diff --git a/package/chapter 4/Perl/wc_output.sh b/package/chapter 4/Perl/wc_output.sh new file mode 100644 index 0000000..dffd048 --- /dev/null +++ b/package/chapter 4/Perl/wc_output.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# List the results directory + +hadoop dfs -ls /user/hadoop/perl/results_wc + +# Cat the first ten lines of the part file + + # hadoop dfs -cat /user/hadoop/perl/results_wc/part-00000 | head -10 +hadoop dfs -cat /user/hadoop/perl/results_wc/part-00000 | tail -10 + diff --git a/package/chapter 4/Perl/wordcount.sh b/package/chapter 4/Perl/wordcount.sh new file mode 100644 index 0000000..0ca97b9 --- /dev/null +++ b/package/chapter 4/Perl/wordcount.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Now run the Perl based word count + +cd $HADOOP_PREFIX + +hadoop jar contrib/streaming/hadoop-*streaming*.jar \ + -file /home/hadoop/perl/mapper.pl \ + -mapper /home/hadoop/perl/mapper.pl \ + -file /home/hadoop/perl/reducer.pl \ + -reducer /home/hadoop/perl/reducer.pl \ + -input /user/hadoop/edgar/* \ + -output /user/hadoop/perl/results_wc + diff --git a/package/chapter 4/hive/example 1.txt b/package/chapter 4/hive/example 1.txt new file mode 100644 index 0000000..57e97c0 --- /dev/null +++ b/package/chapter 4/hive/example 1.txt @@ -0,0 +1,31 @@ + +CREATE TABLE rawdata (line STRING); + + +LOAD DATA INPATH '/user/hadoop/edgar/' INTO TABLE rawdata ; + + +CREATE TABLE wordcount AS + SELECT + word, + count(1) AS count + FROM + (SELECT + EXPLODE(SPLIT(line,' ')) AS word + FROM + rawdata + ) words + GROUP BY word + ORDER BY word ; + + +SELECT + word, + count + FROM + wordcount + WHERE + count > 1500 + ORDER BY + count ; + diff --git a/package/chapter 4/java/patterns.txt b/package/chapter 4/java/patterns.txt new file mode 100644 index 0000000..7fec0d8 --- /dev/null +++ b/package/chapter 4/java/patterns.txt @@ -0,0 +1,17 @@ +! +" +' +_ +; +\( +\) +\# +\$ +\& +\. +\, +\* +\- +\/ +\{ +\} diff --git a/package/chapter 4/java/wc-ex1.java b/package/chapter 4/java/wc-ex1.java new file mode 100644 index 0000000..e35a218 --- /dev/null +++ b/package/chapter 4/java/wc-ex1.java @@ -0,0 +1,70 @@ +package org.myorg; + +import java.io.IOException; +import java.util.*; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.io.*; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +public class WordCount +{ + + public static class Map extends MapReduceBase implements Mapper + { + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + public void map(LongWritable key, Text value, OutputCollector + output, Reporter reporter) throws IOException + { + String line = value.toString(); + StringTokenizer tokenizer = new StringTokenizer(line); + while (tokenizer.hasMoreTokens()) + { + word.set(tokenizer.nextToken()); + output.collect(word, one); + } + } + } + + public static class Reduce extends MapReduceBase implements Reducer + { + public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException + { + int sum = 0; + while (values.hasNext()) + { + sum += values.next().get(); + } + output.collect(key, new IntWritable(sum)); + } + } + + public static void main(String[] args) throws Exception + { + JobConf conf = new JobConf(WordCount.class); + conf.setJobName("wordcount"); + + conf.setOutputKeyClass(Text.class); + conf.setOutputValueClass(IntWritable.class); + + conf.setMapperClass(Map.class); + conf.setCombinerClass(Reduce.class); + conf.setReducerClass(Reduce.class); + + conf.setInputFormat(TextInputFormat.class); + conf.setOutputFormat(TextOutputFormat.class); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + JobClient.runJob(conf); + } + +} diff --git a/package/chapter 4/java/wc-ex2.java b/package/chapter 4/java/wc-ex2.java new file mode 100644 index 0000000..2720f07 --- /dev/null +++ b/package/chapter 4/java/wc-ex2.java @@ -0,0 +1,163 @@ +package org.myorg; + +import java.io.*; +import java.util.*; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.io.*; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +public class WordCount extends Configured implements Tool +{ + + /*-------------------------------------------------------------------------*/ + public static class Map extends MapReduceBase + implements Mapper < LongWritable, Text, Text, IntWritable > + { + + static enum Counters + { + INPUT_WORDS + } + + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + private boolean caseSensitive = true; + private Set < String > patternsToSkip = new HashSet < String > (); + + private long numRecords = 0; + private String inputFile; + + /*-------------------------------------------------------------------------*/ + public void configure(JobConf job) + { + caseSensitive = job.getBoolean("wordcount.case.sensitive", true); + inputFile = job.get("map.input.file"); + + if (job.getBoolean("wordcount.skip.patterns", false)) + { + Path[] patternsFiles = new Path[0]; + try + { + patternsFiles = DistributedCache.getLocalCacheFiles(job); + } + catch (IOException ioe) + { + System.err.println("Caught exception while getting cached files: " + + StringUtils.stringifyException(ioe)); + } + for (Path patternsFile: patternsFiles) + { + parseSkipFile(patternsFile); + } + } + } + /*-------------------------------------------------------------------------*/ + private void parseSkipFile(Path patternsFile) + { + try + { + BufferedReader fis = new BufferedReader(new FileReader(patternsFile.toString())); + String pattern = null; + while ((pattern = fis.readLine()) != null) + { + patternsToSkip.add(pattern); + } + } + catch (IOException ioe) + { + System.err.println("Caught exception while parsing the cached file '" + + patternsFile + "' : " + StringUtils.stringifyException(ioe)); + } + } + /*-------------------------------------------------------------------------*/ + public void map(LongWritable key, Text value, OutputCollector < Text, IntWritable > + output, Reporter reporter) throws IOException + { + String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase(); + + for (String pattern: patternsToSkip) + { + line = line.replaceAll(pattern, ""); + } + + StringTokenizer tokenizer = new StringTokenizer(line); + while (tokenizer.hasMoreTokens()) + { + word.set(tokenizer.nextToken()); + output.collect(word, one); + reporter.incrCounter(Counters.INPUT_WORDS, 1); + } + + if ((++numRecords % 100) == 0) + { + reporter.setStatus("Finished processing " + numRecords + " records " + + "from the input file: " + inputFile); + } + } + + } /* class Map */ + + /*-------------------------------------------------------------------------*/ + public static class Reduce extends MapReduceBase implements Reducer < Text, + IntWritable, Text, IntWritable > + { + public void reduce(Text key, Iterator < IntWritable > values, OutputCollector + < Text, IntWritable > output, Reporter reporter) throws IOException + { + int sum = 0; + while (values.hasNext()) + { + sum += values.next().get(); + } + output.collect(key, new IntWritable(sum)); + } + } /* class Reduce */ + /*-------------------------------------------------------------------------*/ + public int run(String[] args) throws Exception + { + JobConf conf = new JobConf(getConf(), WordCount.class); + conf.setJobName("wordcount"); + + conf.setOutputKeyClass(Text.class); + conf.setOutputValueClass(IntWritable.class); + + conf.setMapperClass(Map.class); + conf.setCombinerClass(Reduce.class); + conf.setReducerClass(Reduce.class); + + conf.setInputFormat(TextInputFormat.class); + conf.setOutputFormat(TextOutputFormat.class); + + List < String > other_args = new ArrayList < String > (); + for (int i = 0; i < args.length; ++i) + { + if ("-skip".equals(args[i])) + { + DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); + conf.setBoolean("wordcount.skip.patterns", true); + } + else + { + other_args.add(args[i]); + } + } + + FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); + FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); + + JobClient.runJob(conf); + return 0; + } + /*-------------------------------------------------------------------------*/ + public static void main(String[] args) throws Exception + { + int res = ToolRunner.run(new Configuration(), new WordCount(), args); + System.exit(res); + } + +} /* class word count*/ diff --git a/package/chapter 4/pig UDF/clean_wc.sh b/package/chapter 4/pig UDF/clean_wc.sh new file mode 100644 index 0000000..0acd85f --- /dev/null +++ b/package/chapter 4/pig UDF/clean_wc.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +# remove the pig script results directory + +hadoop dfs -rmr /user/hadoop/pig/wc_result1 diff --git a/package/chapter 4/pig UDF/result_wc.sh b/package/chapter 4/pig UDF/result_wc.sh new file mode 100644 index 0000000..d0473c4 --- /dev/null +++ b/package/chapter 4/pig UDF/result_wc.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# remove the pig script results directory + +hadoop dfs -ls /user/hadoop/pig/wc_result1 + +echo "\n\n" + +hadoop dfs -cat /user/hadoop/pig/wc_result1/part-r-00000 | tail -10 diff --git a/package/chapter 4/pig UDF/run_wc1.sh b/package/chapter 4/pig UDF/run_wc1.sh new file mode 100644 index 0000000..6180112 --- /dev/null +++ b/package/chapter 4/pig UDF/run_wc1.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# run the pig wc job + +./clean_wc.sh + +pig -stop_on_failure wordcount.pig + diff --git a/package/chapter 4/pig UDF/run_wc2.sh b/package/chapter 4/pig UDF/run_wc2.sh new file mode 100644 index 0000000..9714bd6 --- /dev/null +++ b/package/chapter 4/pig UDF/run_wc2.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# run the pig wc 2 job + +./clean_wc.sh + +pig -stop_on_failure wordcount2.pig + diff --git a/package/chapter 4/pig UDF/wcudfs/CleanWS.java b/package/chapter 4/pig UDF/wcudfs/CleanWS.java new file mode 100644 index 0000000..3268adb --- /dev/null +++ b/package/chapter 4/pig UDF/wcudfs/CleanWS.java @@ -0,0 +1,33 @@ +package wcudfs; + +import java.io.*; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.hadoop.util.*; + +public class CleanWS extends EvalFunc +{ + /*--------------------------------------------------------*/ + @Override + public String exec(Tuple input) throws IOException + { + if (input == null || input.size() == 0) + return null; + try + { + String str = (String)input.get(0); + + return str.replaceAll("[^A-Za-z0-9]"," "); + } + catch(IOException ioe) + { + System.err.println("Caught exception processing input row : " + + StringUtils.stringifyException(ioe) ); + } + + return null; + } + /*--------------------------------------------------------*/ + +} /* class CleanWS */ diff --git a/package/chapter 4/pig UDF/wcudfs/build_clean_ws.sh b/package/chapter 4/pig UDF/wcudfs/build_clean_ws.sh new file mode 100644 index 0000000..2d5a5b7 --- /dev/null +++ b/package/chapter 4/pig UDF/wcudfs/build_clean_ws.sh @@ -0,0 +1,3 @@ + +javac -classpath $PIG_HOME/pig-0.12.1.jar \ + -Xlint:deprecation CleanWS.java diff --git a/package/chapter 4/pig UDF/wcudfs/build_lower.sh b/package/chapter 4/pig UDF/wcudfs/build_lower.sh new file mode 100644 index 0000000..074c5ee --- /dev/null +++ b/package/chapter 4/pig UDF/wcudfs/build_lower.sh @@ -0,0 +1,3 @@ + +javac -classpath $PIG_HOME/pig-0.12.1.jar \ + -Xlint:deprecation Lower.java diff --git a/package/chapter 4/pig UDF/wordcount2.pig b/package/chapter 4/pig UDF/wordcount2.pig new file mode 100644 index 0000000..58e8e21 --- /dev/null +++ b/package/chapter 4/pig UDF/wordcount2.pig @@ -0,0 +1,28 @@ +REGISTER /home/hadoop/pig/wcudfs.jar ; + +DEFINE Lower wcudfs.Lower() ; +DEFINE CleanWS wcudfs.CleanWS() ; + +-- get raw line data from file + +rlines = load '/user/hadoop/pig/10031.txt' AS (rline:chararray); + +-- filter for empty lines + +clines = FILTER rlines BY SIZE(rline) > 0 ; + +-- get list of words + +words = foreach clines generate flatten(TOKENIZE(CleanWS( (chararray) $0 ))) as word ; + +-- group the words by word value + +gword = group words by word ; + +-- create a word count + +wcount = foreach gword generate group, COUNT(words) ; + +-- store the word count + +store wcount into '/user/hadoop/pig/wc_result1' ; diff --git a/package/chapter 4/pig/wordcount.pig b/package/chapter 4/pig/wordcount.pig new file mode 100644 index 0000000..928a94e --- /dev/null +++ b/package/chapter 4/pig/wordcount.pig @@ -0,0 +1,19 @@ +-- get raw line data from file + +rlines = load '/user/hadoop/pig/10031.txt'; + +-- get list of words + +words = foreach rlines generate flatten(TOKENIZE((chararray)$0)) as word ; + +-- group the words by word value + +gwords = group words by word ; + +-- create a word count + +wcount = foreach gwords generate group, COUNT(words) ; + +-- store the word count + +store wcount into '/user/hadoop/pig/wc_result1' ; diff --git a/package/chapter 5/oozie/coordinator.xml b/package/chapter 5/oozie/coordinator.xml new file mode 100644 index 0000000..c534d13 --- /dev/null +++ b/package/chapter 5/oozie/coordinator.xml @@ -0,0 +1,28 @@ + + + + + + ${hdfsRawData}/${YEAR}_${MONTH}_${DAY}_Fuel_Consumption + + + + + + ${oozieWfPath}/workflow.xml + + + + diff --git a/package/chapter 5/oozie/load.job.properties b/package/chapter 5/oozie/load.job.properties new file mode 100644 index 0000000..513fabc --- /dev/null +++ b/package/chapter 5/oozie/load.job.properties @@ -0,0 +1,36 @@ +# ---------------------------------------- +# Workflow job properties +# ---------------------------------------- + +nameNode=hdfs://hc1nn:8020 + +# Yarn resource manager host and port +jobTracker=hc1nn:8032 +queueName=high_pool + +oozie.libpath=${nameNode}/user/hadoop/share/lib +oozie.use.system.libpath=true +oozie.wf.rerun.failnodes=true + +hdfsUser=hadoop +wfProject=fuel +hdfsWfHome=${nameNode}/user/${hdfsUser}/oozie_wf/${wfProject} +hdfsRawData=${hdfsWfHome}/rawdata +hdfsEntityData=${hdfsWfHome}/entity + +oozie.wf.application.path=${hdfsWfHome}/pigwf +oozieWfPath=${hdfsWfHome}/pigwf/ + +# Job Coordination properties + +#jobStart=2014-07-10T12:00Z +#jobEnd=2014-09-10T12:00Z + +# Frequency in minutes + +#JobFreq=10080 +#jobNZTimeZone=GMT+1200 +#DataJobFreq=1440 + +# oozie.coord.application.path=${hdfsWfHome}/pigwf/ + diff --git a/package/chapter 5/oozie/manufacturer.pig b/package/chapter 5/oozie/manufacturer.pig new file mode 100644 index 0000000..7d18d3c --- /dev/null +++ b/package/chapter 5/oozie/manufacturer.pig @@ -0,0 +1,16 @@ + +-- get the raw data from the files from the csv files + +rlines = LOAD '/user/hadoop/oozie_wf/fuel/rawdata/*.csv' USING PigStorage(',') AS + ( year:int, manufacturer:chararray, model:chararray, class:chararray, size:float, cylinders:int, + transmission:chararray, fuel:chararray, cons_cityl100:float, cond_hwyl100:float, cons_citympgs:int, + cond_hwympgs:int, lyears:int, co2s:int + ); + +mlist = FOREACH rlines GENERATE manufacturer; + +dlist = DISTINCT mlist ; + +-- save to a new file + +STORE dlist INTO '/user/hadoop/oozie_wf/fuel/entity/manufacturer/' ; diff --git a/package/chapter 5/oozie/manufacturer.sql b/package/chapter 5/oozie/manufacturer.sql new file mode 100644 index 0000000..284280b --- /dev/null +++ b/package/chapter 5/oozie/manufacturer.sql @@ -0,0 +1,14 @@ + +drop table if exists rawdata2 ; + +create external table rawdata2 ( + line string +) +location '/user/hadoop/oozie_wf/fuel/entity/manufacturer/' ; + +drop table if exists manufacturer ; + +create table manufacturer as + select distinct line from rawdata2 where line not like '%=%' + and line not like '% % %' ; + diff --git a/package/chapter 5/oozie/model.pig b/package/chapter 5/oozie/model.pig new file mode 100644 index 0000000..17b6ece --- /dev/null +++ b/package/chapter 5/oozie/model.pig @@ -0,0 +1,15 @@ + +-- get the raw data from the files from the csv files + +rlines = LOAD '/user/hadoop/oozie_wf/fuel/rawdata/*.csv' USING PigStorage(',') AS + ( year:int, manufacturer:chararray, model:chararray, class:chararray, size:float, cylinders:int, + transmission:chararray, fuel:chararray, cons_cityl100:float, cond_hwyl100:float, cons_citympgs:int, + cond_hwympgs:int, lyears:int, co2s:int + ); + +mlist = FOREACH rlines GENERATE manufacturer,year,model ; + +dlist = DISTINCT mlist ; + +STORE dlist INTO '/user/hadoop/oozie_wf/fuel/entity/model/' using PigStorage(','); + diff --git a/package/chapter 5/oozie/model.sql b/package/chapter 5/oozie/model.sql new file mode 100644 index 0000000..152f3e2 --- /dev/null +++ b/package/chapter 5/oozie/model.sql @@ -0,0 +1,17 @@ + +drop table if exists rawdata2 ; + +create external table rawdata2 ( + line string +) +location '/user/hadoop/oozie_wf/fuel/entity/model/' ; + +drop table if exists model ; + +create table model as + select + distinct split(line,',') + from rawdata2 + where + line not like '%=%' ; + diff --git a/package/chapter 5/oozie/workflow.xml b/package/chapter 5/oozie/workflow.xml new file mode 100644 index 0000000..281f3f8 --- /dev/null +++ b/package/chapter 5/oozie/workflow.xml @@ -0,0 +1,95 @@ + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + + + + mapred.job.queue.name + ${queueName} + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + + + + mapred.job.queue.name + ${queueName} + + + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + + + + + + + + + + Workflow died, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + diff --git a/package/chapter 5/scheduler/capacity/v1/capacity-scheduler.xml b/package/chapter 5/scheduler/capacity/v1/capacity-scheduler.xml new file mode 100644 index 0000000..d434ea7 --- /dev/null +++ b/package/chapter 5/scheduler/capacity/v1/capacity-scheduler.xml @@ -0,0 +1,220 @@ + + + + + + + + + + + mapred.capacity-scheduler.maximum-system-jobs + 3000 + Maximum number of jobs in the system which can be initialized, + concurrently, by the CapacityScheduler. + + + + + mapred.capacity-scheduler.queue.default.capacity + 50 + Percentage of the number of slots in the cluster that are + to be available for jobs in this queue. + + + + + mapred.capacity-scheduler.queue.default.maximum-capacity + -1 + + maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster. + This provides a means to limit how much excess capacity a queue can use. By default, there is no limit. + The maximum-capacity of a queue can only be greater than or equal to its minimum capacity. + Default value of -1 implies a queue can use complete capacity of the cluster. + + This property could be to curtail certain jobs which are long running in nature from occupying more than a + certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of + other queues being affected. + + One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity + the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in + absolute terms would increase accordingly. + + + + + mapred.capacity-scheduler.queue.default.supports-priority + false + If true, priorities of jobs will be taken into + account in scheduling decisions. + + + + + mapred.capacity-scheduler.queue.default.minimum-user-limit-percent + 100 + Each queue enforces a limit on the percentage of resources + allocated to a user at any given time, if there is competition for them. + This user limit can vary between a minimum and maximum value. The former + depends on the number of users who have submitted jobs, and the latter is + set to this property value. For example, suppose the value of this + property is 25. If two users have submitted jobs to a queue, no single + user can use more than 50% of the queue resources. If a third user submits + a job, no single user can use more than 33% of the queue resources. With 4 + or more users, no user can use more than 25% of the queue's resources. A + value of 100 implies no user limits are imposed. + + + + + mapred.capacity-scheduler.queue.default.user-limit-factor + 1 + The multiple of the queue capacity which can be configured to + allow a single user to acquire more slots. + + + + + mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks + 200000 + The maximum number of tasks, across all jobs in the queue, + which can be initialized concurrently. Once the queue's jobs exceed this + limit they will be queued on disk. + + + + + mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user + 100000 + The maximum number of tasks per-user, across all the of the + user's jobs in the queue, which can be initialized concurrently. Once the + user's jobs exceed this limit they will be queued on disk. + + + + + mapred.capacity-scheduler.queue.default.init-accept-jobs-factor + 10 + The multipe of (maximum-system-jobs * queue-capacity) used to + determine the number of jobs which are accepted by the scheduler. + + + + + + + + mapred.capacity-scheduler.default-supports-priority + false + If true, priorities of jobs will be taken into + account in scheduling decisions by default in a job queue. + + + + + mapred.capacity-scheduler.default-minimum-user-limit-percent + 100 + The percentage of the resources limited to a particular user + for the job queue at any given point of time by default. + + + + + + mapred.capacity-scheduler.default-user-limit-factor + 1 + The default multiple of queue-capacity which is used to + determine the amount of slots a single user can consume concurrently. + + + + + mapred.capacity-scheduler.default-maximum-active-tasks-per-queue + 200000 + The default maximum number of tasks, across all jobs in the + queue, which can be initialized concurrently. Once the queue's jobs exceed + this limit they will be queued on disk. + + + + + mapred.capacity-scheduler.default-maximum-active-tasks-per-user + 100000 + The default maximum number of tasks per-user, across all the of + the user's jobs in the queue, which can be initialized concurrently. Once + the user's jobs exceed this limit they will be queued on disk. + + + + + mapred.capacity-scheduler.default-init-accept-jobs-factor + 10 + The default multipe of (maximum-system-jobs * queue-capacity) + used to determine the number of jobs which are accepted by the scheduler. + + + + + + mapred.capacity-scheduler.init-poll-interval + 5000 + The amount of time in miliseconds which is used to poll + the job queues for jobs to initialize. + + + + mapred.capacity-scheduler.init-worker-threads + 5 + Number of worker threads which would be used by + Initialization poller to initialize jobs in a set of queue. + If number mentioned in property is equal to number of job queues + then a single thread would initialize jobs in a queue. If lesser + then a thread would get a set of queues assigned. If the number + is greater then number of threads would be equal to number of + job queues. + + + + + + + mapred.capacity-scheduler.queue.tqueue.capacity + 50 + + + + mapred.capacity-scheduler.queue.tqueue.maximum-capacity + 100 + + + + mapred.capacity-scheduler.queue.tqueue.supports-priority + true + + + + mapred.capacity-scheduler.queue.tqueue.minimum-user-limit-percent + 20 + + + + mapred.capacity-scheduler.queue.tqueue.user-limit-factor + 1 + + + + mapred.capacity-scheduler.queue.tqueue.maximum-initialized-active-tasks + 200000 + + + + mapred.capacity-scheduler.queue.tqueue.maximum-initialized-active-tasks-per-user + 100000 + + + + mapred.capacity-scheduler.queue.tqueue.init-accept-jobs-factor + 10 + + + diff --git a/package/chapter 5/scheduler/capacity/v2/capacity-scheduler.xml b/package/chapter 5/scheduler/capacity/v2/capacity-scheduler.xml new file mode 100644 index 0000000..40be8e5 --- /dev/null +++ b/package/chapter 5/scheduler/capacity/v2/capacity-scheduler.xml @@ -0,0 +1,261 @@ + + + + yarn.scheduler.capacity.maximum-applications + 10000 + + Maximum number of applications that can be pending and running. + + + + + yarn.scheduler.capacity.maximum-am-resource-percent + 0.1 + + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + + + + + yarn.scheduler.capacity.resource-calculator + org.apache.hadoop.yarn.server.resourcemanager.resource.DefaultResourceCalculator + + The ResourceCalculator implementation to be used to compare + Resources in the scheduler. + The default i.e. DefaultResourceCalculator only uses Memory while + DominantResourceCalculator uses dominant-resource to compare + multi-dimensional resources such as Memory, CPU etc. + + + + + + + yarn.scheduler.capacity.root.queues + client1,client2,client3 + + + + yarn.scheduler.capacity.root.client1.queues + client1a,client1b + + + + yarn.scheduler.capacity.root.client2.queues + client2a,client2b + + + + yarn.scheduler.capacity.root.client3.queues + client3a,client3b + + + + + + + + + + yarn.scheduler.capacity.root.client1.capacity 33 + + + yarn.scheduler.capacity.root.client1.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client1.maximum-capacity 100 + + + yarn.scheduler.capacity.root.client1.state RUNNING + + + yarn.scheduler.capacity.root.client1.acl_submit_applications * + + + yarn.scheduler.capacity.root.client1.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client1.client1a.capacity 50 + + + yarn.scheduler.capacity.root.client1.client1a.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client1.client1a.maximum-capacity 50 + + + yarn.scheduler.capacity.root.client1.client1a.state RUNNING + + + yarn.scheduler.capacity.root.client1.client1a.acl_submit_applications * + + + yarn.scheduler.capacity.root.client1.client1a.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client1.client1b.capacity 50 + + + yarn.scheduler.capacity.root.client1.client1b.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client1.client1b.maximum-capacity 50 + + + yarn.scheduler.capacity.root.client1.client1b.state RUNNING + + + yarn.scheduler.capacity.root.client1.client1b.acl_submit_applications * + + + yarn.scheduler.capacity.root.client1.client1b.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client2.capacity 33 + + + yarn.scheduler.capacity.root.client2.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client2.maximum-capacity 100 + + + yarn.scheduler.capacity.root.client2.state RUNNING + + + yarn.scheduler.capacity.root.client2.acl_submit_applications * + + + yarn.scheduler.capacity.root.client2.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client2.client2a.capacity 50 + + + yarn.scheduler.capacity.root.client2.client2a.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client2.client2a.maximum-capacity 50 + + + yarn.scheduler.capacity.root.client2.client2a.state RUNNING + + + yarn.scheduler.capacity.root.client2.client2a.acl_submit_applications * + + + yarn.scheduler.capacity.root.client2.client2a.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client2.client2b.capacity 50 + + + yarn.scheduler.capacity.root.client2.client2b.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client2.client2b.maximum-capacity 50 + + + yarn.scheduler.capacity.root.client2.client2b.state RUNNING + + + yarn.scheduler.capacity.root.client2.client2b.acl_submit_applications * + + + yarn.scheduler.capacity.root.client2.client2b.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client3.capacity 34 + + + yarn.scheduler.capacity.root.client3.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client3.maximum-capacity 100 + + + yarn.scheduler.capacity.root.client3.state RUNNING + + + yarn.scheduler.capacity.root.client3.acl_submit_applications * + + + yarn.scheduler.capacity.root.client3.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client3.client3a.capacity 50 + + + yarn.scheduler.capacity.root.client3.client3a.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client3.client3a.maximum-capacity 50 + + + yarn.scheduler.capacity.root.client3.client3a.state RUNNING + + + yarn.scheduler.capacity.root.client3.client3a.acl_submit_applications * + + + yarn.scheduler.capacity.root.client3.client3a.acl_administer_queue * + + + + + + yarn.scheduler.capacity.root.client3.client3b.capacity 50 + + + yarn.scheduler.capacity.root.client3.client3b.user-limit-factor 1 + + + yarn.scheduler.capacity.root.client3.client3b.maximum-capacity 50 + + + yarn.scheduler.capacity.root.client3.client3b.state RUNNING + + + yarn.scheduler.capacity.root.client3.client3b.acl_submit_applications * + + + yarn.scheduler.capacity.root.client3.client3b.acl_administer_queue * + + + + + + yarn.scheduler.capacity.node-locality-delay + -1 + + Number of missed scheduling opportunities after which the CapacityScheduler + attempts to schedule rack-local containers. + Typically this should be set to number of racks in the cluster, this + feature is disabled by default, set to -1. + + + + diff --git a/package/chapter 5/scheduler/fair/v1/fair-scheduler.xml b/package/chapter 5/scheduler/fair/v1/fair-scheduler.xml new file mode 100644 index 0000000..d8814c6 --- /dev/null +++ b/package/chapter 5/scheduler/fair/v1/fair-scheduler.xml @@ -0,0 +1,39 @@ + + + + + + + + 10 + 10 + 50 + 50 + 1000 + 3 + + + + 10 + 10 + 50 + 50 + 1000 + 1 + + + + 10 + 10 + 50 + 50 + 1000 + 1 + + + diff --git a/package/chapter 5/scheduler/fair/v2/fair-scheduler.xml b/package/chapter 5/scheduler/fair/v2/fair-scheduler.xml new file mode 100644 index 0000000..d8814c6 --- /dev/null +++ b/package/chapter 5/scheduler/fair/v2/fair-scheduler.xml @@ -0,0 +1,39 @@ + + + + + + + + 10 + 10 + 50 + 50 + 1000 + 3 + + + + 10 + 10 + 50 + 50 + 1000 + 1 + + + + 10 + 10 + 50 + 50 + 1000 + 1 + + + diff --git a/package/chapter 6/flume/agent1.cfg b/package/chapter 6/flume/agent1.cfg new file mode 100644 index 0000000..87d6b9f --- /dev/null +++ b/package/chapter 6/flume/agent1.cfg @@ -0,0 +1,35 @@ +# ---------------------------------------------------------------------- +# define agent src, channel and sink +# ---------------------------------------------------------------------- + +agent1.sources = source1 +agent1.channels = channel1 +agent1.sinks = sink1 + +# ---------------------------------------------------------------------- +# define agent channel +# ---------------------------------------------------------------------- + +agent1.channels.channel1.type = FILE +agent1.channels.channel1.capacity = 2000000 +agent1.channels.channel1.checkpointInterval = 60000 +agent1.channels.channel1.maxFileSize = 10737418240 + +# ---------------------------------------------------------------------- +# define agent source +# ---------------------------------------------------------------------- + +agent1.sources.source1.type = exec +agent1.sources.source1.command = tail -F /var/log/messages +agent1.sources.source1.channels = channel1 + +# ---------------------------------------------------------------------- +# define agent sink +# ---------------------------------------------------------------------- + +agent1.sinks.sink1.type = hdfs +agent1.sinks.sink1.hdfs.path = hdfs://hc1nn/flume/messages +agent1.sinks.sink1.hdfs.rollInterval = 0 +agent1.sinks.sink1.hdfs.rollSize = 1000000 +agent1.sinks.sink1.hdfs.batchSize = 100 +agent1.sinks.sink1.channel = channel1 diff --git a/package/chapter 6/flume/flume_clean_hdfs.sh b/package/chapter 6/flume/flume_clean_hdfs.sh new file mode 100644 index 0000000..c38bf4a --- /dev/null +++ b/package/chapter 6/flume/flume_clean_hdfs.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +hdfs dfs -rm -r /flume/messages diff --git a/package/chapter 6/flume/flume_exec.sh b/package/chapter 6/flume/flume_exec.sh new file mode 100644 index 0000000..9b5877d --- /dev/null +++ b/package/chapter 6/flume/flume_exec.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# run the bash agent + +flume-ng agent \ + --conf /etc/flume-ng/conf \ + --conf-file agent1.cfg \ + -Dflume.root.logger=DEBUG,INFO,console \ + -name agent1 diff --git a/package/chapter 6/flume/flume_show_hdfs.sh b/package/chapter 6/flume/flume_show_hdfs.sh new file mode 100644 index 0000000..39fed1a --- /dev/null +++ b/package/chapter 6/flume/flume_show_hdfs.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +hdfs dfs -ls /flume/messages diff --git a/package/chapter 6/sqoop/hive-import.txt b/package/chapter 6/sqoop/hive-import.txt new file mode 100644 index 0000000..eca2f8c --- /dev/null +++ b/package/chapter 6/sqoop/hive-import.txt @@ -0,0 +1,10 @@ +import +--connect +jdbc:mysql://hc1nn/sqoop +--username +sqoop +--password +FirmWare1234 +--table +rawdata +--hive-import diff --git a/package/chapter 6/sqoop/import.txt b/package/chapter 6/sqoop/import.txt new file mode 100644 index 0000000..9fd0bac --- /dev/null +++ b/package/chapter 6/sqoop/import.txt @@ -0,0 +1,10 @@ + +import +--connect +jdbc:mysql://hc1nn/sqoop +--username +sqoop +--password +FirmWare1234 +--table +rawdata diff --git a/package/chapter 7/ganglia/gmetad.conf b/package/chapter 7/ganglia/gmetad.conf new file mode 100644 index 0000000..0e3079e --- /dev/null +++ b/package/chapter 7/ganglia/gmetad.conf @@ -0,0 +1,125 @@ +# This is an example of a Ganglia Meta Daemon configuration file +# http://ganglia.sourceforge.net/ +# +# $Id: gmetad.conf.in 2014 2009-08-10 10:44:09Z d_pocock $ +# +#------------------------------------------------------------------------------- +# Setting the debug_level to 1 will keep daemon in the forground and +# show only error messages. Setting this value higher than 1 will make +# gmetad output debugging information and stay in the foreground. +# default: 0 +# debug_level 10 +# +#------------------------------------------------------------------------------- +# What to monitor. The most important section of this file. +# +# The data_source tag specifies either a cluster or a grid to +# monitor. If we detect the source is a cluster, we will maintain a complete +# set of RRD databases for it, which can be used to create historical +# graphs of the metrics. If the source is a grid (it comes from another gmetad), +# we will only maintain summary RRDs for it. +# +# Format: +# data_source "my cluster" [polling interval] address1:port addreses2:port ... +# +# The keyword 'data_source' must immediately be followed by a unique +# string which identifies the source, then an optional polling interval in +# seconds. The source will be polled at this interval on average. +# If the polling interval is omitted, 15sec is asssumed. +# +# A list of machines which service the data source follows, in the +# format ip:port, or name:port. If a port is not specified then 8649 +# (the default gmond port) is assumed. +# default: There is no default value +# +# data_source "my cluster" 10 localhost my.machine.edu:8649 1.2.3.5:8655 +# data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651 +# data_source "another source" 1.3.4.7:8655 1.3.4.8 + +data_source "my cluster" 30 hc1nn + +# +# Round-Robin Archives +# You can specify custom Round-Robin archives here (defaults are listed below) +# +# RRAs "RRA:AVERAGE:0.5:1:244" "RRA:AVERAGE:0.5:24:244" "RRA:AVERAGE:0.5:168:244" "RRA:AVERAGE:0.5:672:244" \ +# "RRA:AVERAGE:0.5:5760:374" +# + +# +#------------------------------------------------------------------------------- +# Scalability mode. If on, we summarize over downstream grids, and respect +# authority tags. If off, we take on 2.5.0-era behavior: we do not wrap our output +# in tags, we ignore all tags we see, and always assume +# we are the "authority" on data source feeds. This approach does not scale to +# large groups of clusters, but is provided for backwards compatibility. +# default: on +# scalable off +# +#------------------------------------------------------------------------------- +# The name of this Grid. All the data sources above will be wrapped in a GRID +# tag with this name. +# default: unspecified +# gridname "MyGrid" +# +#------------------------------------------------------------------------------- +# The authority URL for this grid. Used by other gmetads to locate graphs +# for our data sources. Generally points to a ganglia/ +# website on this machine. +# default: "http://hostname/ganglia/", +# where hostname is the name of this machine, as defined by gethostname(). +# authority "http://mycluster.org/newprefix/" +# +#------------------------------------------------------------------------------- +# List of machines this gmetad will share XML with. Localhost +# is always trusted. +# default: There is no default value +# trusted_hosts 127.0.0.1 169.229.50.165 my.gmetad.org +# +#------------------------------------------------------------------------------- +# If you want any host which connects to the gmetad XML to receive +# data, then set this value to "on" +# default: off +# all_trusted on +# +#------------------------------------------------------------------------------- +# If you don't want gmetad to setuid then set this to off +# default: on +# setuid off +# +#------------------------------------------------------------------------------- +# User gmetad will setuid to (defaults to "ganglia") +# default: "ganglia" +# setuid_username "ganglia" +# +#------------------------------------------------------------------------------- +# The port gmetad will answer requests for XML +# default: 8651 +# xml_port 8651 +# +#------------------------------------------------------------------------------- +# The port gmetad will answer queries for XML. This facility allows +# simple subtree and summation views of the XML tree. +# default: 8652 +# interactive_port 8652 +# +#------------------------------------------------------------------------------- +# The number of threads answering XML requests +# default: 4 +# server_threads 10 +# +#------------------------------------------------------------------------------- +# Where gmetad stores its round-robin databases +# default: "/var/lib/ganglia/rrds" +# rrd_rootdir "/some/other/place" +# +#------------------------------------------------------------------------------- +# In earlier versions of gmetad, hostnames were handled in a case +# sensitive manner +# If your hostname directories have been renamed to lower case, +# set this option to 0 to disable backward compatibility. +# From version 3.2, backwards compatibility will be disabled by default. +# default: 1 (for gmetad < 3.2) +# default: 0 (for gmetad >= 3.2) +case_sensitive_hostnames 1 + diff --git a/package/chapter 7/ganglia/gmond.conf b/package/chapter 7/ganglia/gmond.conf new file mode 100644 index 0000000..bc3f87d --- /dev/null +++ b/package/chapter 7/ganglia/gmond.conf @@ -0,0 +1,340 @@ +/* This configuration is as close to 2.5.x default behavior as possible + The values closely match ./gmond/metric.h definitions in 2.5.x */ +globals { + daemonize = yes + setuid = yes + user = ganglia + debug_level = 0 + max_udp_msg_len = 1472 + mute = no + deaf = no + allow_extra_data = yes + host_dmax = 0 /*secs */ + cleanup_threshold = 300 /*secs */ + gexec = no + send_metadata_interval = 0 /*secs */ +} + +/* + * The cluster attributes specified will be used as part of the + * tag that will wrap all hosts collected by this instance. + */ +cluster { + name = "my cluster" + owner = "unspecified" + latlong = "unspecified" + url = "unspecified" +} + +/* The host section describes attributes of the host, like the location */ +host { + location = "unspecified" +} + +/* Feel free to specify as many udp_send_channels as you like. Gmond + used to only support having a single channel */ +udp_send_channel { + host = 192.168.1.107 + port = 8649 + ttl = 1 +} + +/* You can specify as many udp_recv_channels as you like as well. */ +udp_recv_channel { + port = 8649 +} + +/* You can specify as many tcp_accept_channels as you like to share + an xml description of the state of the cluster */ +tcp_accept_channel { + port = 8649 +} + +/* Each metrics module that is referenced by gmond must be specified and + loaded. If the module has been statically linked with gmond, it does + not require a load path. However all dynamically loadable modules must + include a load path. */ +modules { + module { + name = "core_metrics" + } + module { + name = "cpu_module" + path = "modcpu.so" + } + module { + name = "disk_module" + path = "moddisk.so" + } + module { + name = "load_module" + path = "modload.so" + } + module { + name = "mem_module" + path = "modmem.so" + } + module { + name = "net_module" + path = "modnet.so" + } + module { + name = "proc_module" + path = "modproc.so" + } + module { + name = "sys_module" + path = "modsys.so" + } +} + +include ('/etc/ganglia/conf.d/*.conf') + +/* The old internal 2.5.x metric array has been replaced by the following + collection_group directives. What follows is the default behavior for + collecting and sending metrics that is as close to 2.5.x behavior as + possible. */ + +/* This collection group will cause a heartbeat (or beacon) to be sent every + 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses + the age of the running gmond. */ +collection_group { + collect_once = yes + time_threshold = 20 + metric { + name = "heartbeat" + } +} + +/* This collection group will send general info about this host every + 1200 secs. + This information doesn't change between reboots and is only collected + once. */ +collection_group { + collect_once = yes + time_threshold = 1200 + metric { + name = "cpu_num" + title = "CPU Count" + } + metric { + name = "cpu_speed" + title = "CPU Speed" + } + metric { + name = "mem_total" + title = "Memory Total" + } + /* Should this be here? Swap can be added/removed between reboots. */ + metric { + name = "swap_total" + title = "Swap Space Total" + } + metric { + name = "boottime" + title = "Last Boot Time" + } + metric { + name = "machine_type" + title = "Machine Type" + } + metric { + name = "os_name" + title = "Operating System" + } + metric { + name = "os_release" + title = "Operating System Release" + } + metric { + name = "location" + title = "Location" + } +} + +/* This collection group will send the status of gexecd for this host + every 300 secs.*/ +/* Unlike 2.5.x the default behavior is to report gexecd OFF. */ +collection_group { + collect_once = yes + time_threshold = 300 + metric { + name = "gexec" + title = "Gexec Status" + } +} + +/* This collection group will collect the CPU status info every 20 secs. + The time threshold is set to 90 seconds. In honesty, this + time_threshold could be set significantly higher to reduce + unneccessary network chatter. */ +collection_group { + collect_every = 20 + time_threshold = 90 + /* CPU status */ + metric { + name = "cpu_user" + value_threshold = "1.0" + title = "CPU User" + } + metric { + name = "cpu_system" + value_threshold = "1.0" + title = "CPU System" + } + metric { + name = "cpu_idle" + value_threshold = "5.0" + title = "CPU Idle" + } + metric { + name = "cpu_nice" + value_threshold = "1.0" + title = "CPU Nice" + } + metric { + name = "cpu_aidle" + value_threshold = "5.0" + title = "CPU aidle" + } + metric { + name = "cpu_wio" + value_threshold = "1.0" + title = "CPU wio" + } + /* The next two metrics are optional if you want more detail... + ... since they are accounted for in cpu_system. + metric { + name = "cpu_intr" + value_threshold = "1.0" + title = "CPU intr" + } + metric { + name = "cpu_sintr" + value_threshold = "1.0" + title = "CPU sintr" + } + */ +} + +collection_group { + collect_every = 20 + time_threshold = 90 + /* Load Averages */ + metric { + name = "load_one" + value_threshold = "1.0" + title = "One Minute Load Average" + } + metric { + name = "load_five" + value_threshold = "1.0" + title = "Five Minute Load Average" + } + metric { + name = "load_fifteen" + value_threshold = "1.0" + title = "Fifteen Minute Load Average" + } +} + +/* This group collects the number of running and total processes */ +collection_group { + collect_every = 80 + time_threshold = 950 + metric { + name = "proc_run" + value_threshold = "1.0" + title = "Total Running Processes" + } + metric { + name = "proc_total" + value_threshold = "1.0" + title = "Total Processes" + } +} + +/* This collection group grabs the volatile memory metrics every 40 secs and + sends them at least every 180 secs. This time_threshold can be increased + significantly to reduce unneeded network traffic. */ +collection_group { + collect_every = 40 + time_threshold = 180 + metric { + name = "mem_free" + value_threshold = "1024.0" + title = "Free Memory" + } + metric { + name = "mem_shared" + value_threshold = "1024.0" + title = "Shared Memory" + } + metric { + name = "mem_buffers" + value_threshold = "1024.0" + title = "Memory Buffers" + } + metric { + name = "mem_cached" + value_threshold = "1024.0" + title = "Cached Memory" + } + metric { + name = "swap_free" + value_threshold = "1024.0" + title = "Free Swap Space" + } +} + +collection_group { + collect_every = 40 + time_threshold = 300 + metric { + name = "bytes_out" + value_threshold = 4096 + title = "Bytes Sent" + } + metric { + name = "bytes_in" + value_threshold = 4096 + title = "Bytes Received" + } + metric { + name = "pkts_in" + value_threshold = 256 + title = "Packets Received" + } + metric { + name = "pkts_out" + value_threshold = 256 + title = "Packets Sent" + } +} + +/* Different than 2.5.x default since the old config made no sense */ +collection_group { + collect_every = 1800 + time_threshold = 3600 + metric { + name = "disk_total" + value_threshold = 1.0 + title = "Total Disk Space" + } +} + +collection_group { + collect_every = 40 + time_threshold = 180 + metric { + name = "disk_free" + value_threshold = 1.0 + title = "Disk Space Available" + } + metric { + name = "part_max_used" + value_threshold = 1.0 + title = "Maximum Disk Space Used" + } +} + diff --git a/package/chapter 7/hue/hue.ini b/package/chapter 7/hue/hue.ini new file mode 100644 index 0000000..a7e47a2 --- /dev/null +++ b/package/chapter 7/hue/hue.ini @@ -0,0 +1,726 @@ +# Hue configuration file +# =================================== +# +# For complete documentation about the contents of this file, run +# $ /build/env/bin/hue config_help +# +# All .ini files under the current directory are treated equally. Their +# contents are merged to form the Hue configuration, which can +# can be viewed on the Hue at +# http://:/dump_config + + +########################################################################### +# General configuration for core Desktop features (authentication, etc) +########################################################################### + +[desktop] + + # Set this to a random string, the longer the better. + # This is used for secure hashing in the session store. + secret_key=kdntwdfjgmxnsprngpwekspfnsmdpwtyiubkdn + + # Webserver listens on this address and port + http_host=hc1nn + http_port=8888 + + # Time zone name + time_zone=America/Los_Angeles + + # Turn off debug + django_debug_mode=0 + + # Turn off backtrace for server error + http_500_debug_mode=0 + + # Server email for internal error messages + ## django_server_email='hue@localhost.localdomain' + + # Email backend + ## django_email_backend=django.core.mail.backends.smtp.EmailBackend + + # Set to true to use CherryPy as the webserver, set to false + # to use Spawning as the webserver. Defaults to Spawning if + # key is not specified. + ## use_cherrypy_server = false + ## use_cherrypy_server = true + + # Webserver runs as this user + ## server_user=hue + ## server_group=hue + + # If set to false, runcpserver will not actually start the web server. + # Used if Apache is being used as a WSGI container. + ## enable_server=yes + + # Number of threads used by the CherryPy web server + ## cherrypy_server_threads=10 + + # Filename of SSL Certificate + ## ssl_certificate= + + # Filename of SSL RSA Private Key + ## ssl_private_key= + + # List of allowed and disallowed ciphers + ## ssl_cipher_list=DEFAULT:!aNULL:!eNULL:!LOW:!EXPORT:!SSLv2 + + # Default encoding for site data + ## default_site_encoding=utf-8 + + # Help improve Hue with anonymous usage analytics. + # Use Google Analytics to see how many times an application or specific section of an application is used, nothing more. + ## collect_usage=true + + ## Comma-separated list of regular expressions, which match the redirect URL. + ## For example, to restrict to your local domain and FQDN, the following value can be used: + ## ^\/.*$,^http:\/\/www.mydomain.com\/.*$ + # redirect_whitelist= + + # Administrators + # ---------------- + [[django_admins]] + ## [[[admin1]]] + ## name=john + ## email=john@doe.com + + # UI customizations + # ------------------- + [[custom]] + + # Top banner HTML code + ## banner_top_html= + + # Configuration options for user authentication into the web application + # ------------------------------------------------------------------------ + [[auth]] + + # Authentication backend. Common settings are: + # - django.contrib.auth.backends.ModelBackend (entirely Django backend) + # - desktop.auth.backend.AllowAllBackend (allows everyone) + # - desktop.auth.backend.AllowFirstUserDjangoBackend + # (Default. Relies on Django and user manager, after the first login) + # - desktop.auth.backend.LdapBackend + # - desktop.auth.backend.PamBackend + # - desktop.auth.backend.SpnegoDjangoBackend + # - desktop.auth.backend.RemoteUserDjangoBackend + # - desktop.auth.backend.OAuthBackend + # - libsaml.backend.SAML2Backend + ## backend=desktop.auth.backend.AllowFirstUserDjangoBackend + + # Backend to synchronize user-group membership with + ## user_group_membership_synchronization_backend=desktop.auth.backend.LdapSynchronizationBackend + + ## pam_service=login + + # When using the desktop.auth.backend.RemoteUserDjangoBackend, this sets + # the normalized name of the header that contains the remote user. + # The HTTP header in the request is converted to a key by converting + # all characters to uppercase, replacing any hyphens with underscores + # and adding an HTTP_ prefix to the name. So, for example, if the header + # is called Remote-User that would be configured as HTTP_REMOTE_USER + # + # Defaults to HTTP_REMOTE_USER + ## remote_user_header=HTTP_REMOTE_USER + + # Ignore the case of usernames when searching for existing users. + # Only supported in remoteUserDjangoBackend. + ## ignore_username_case=false + + # Ignore the case of usernames when searching for existing users to authenticate with. + # Only supported in remoteUserDjangoBackend. + ## force_username_lowercase=false + + # Configuration options for connecting to LDAP and Active Directory + # ------------------------------------------------------------------- + [[ldap]] + + # The search base for finding users and groups + ## base_dn="DC=mycompany,DC=com" + + # The NT domain to connect to (only for use with Active Directory) + ## nt_domain=mycompany.com + + # URL of the LDAP server + ## ldap_url=ldap://auth.mycompany.com + + # A PEM-format file containing certificates for the CA's that + # Hue will trust for authentication over TLS. + # The certificate for the CA that signed the + # LDAP server certificate must be included among these certificates. + # See more here http://www.openldap.org/doc/admin24/tls.html. + ## ldap_cert= + ## use_start_tls=true + + # Distinguished name of the user to bind as -- not necessary if the LDAP server + # supports anonymous searches + ## bind_dn="CN=ServiceAccount,DC=mycompany,DC=com" + + # Password of the bind user -- not necessary if the LDAP server supports + # anonymous searches + ## bind_password= + + # Pattern for searching for usernames -- Use for the parameter + # For use when using LdapBackend for Hue authentication + ## ldap_username_pattern="uid=,ou=People,dc=mycompany,dc=com" + + # Create users in Hue when they try to login with their LDAP credentials + # For use when using LdapBackend for Hue authentication + ## create_users_on_login = true + + # Ignore the case of usernames when searching for existing users in Hue. + ## ignore_username_case=false + + # Force usernames to lowercase when creating new users from LDAP. + ## force_username_lowercase=false + + # Use search bind authentication. + ## search_bind_authentication=true + + # Choose which kind of subgrouping to use: nested or suboordinate (deprecated). + ## subgroups=suboordinate + + # Define the number of levels to search for nested members. + ## nested_members_search_depth=10 + + [[[users]]] + + # Base filter for searching for users + ## user_filter="objectclass=*" + + # The username attribute in the LDAP schema + ## user_name_attr=sAMAccountName + + [[[groups]]] + + # Base filter for searching for groups + ## group_filter="objectclass=*" + + # The group name attribute in the LDAP schema + ## group_name_attr=cn + + # The attribute of the group object which identifies the members of the group + ## group_member_attr=members + + # Configuration options for specifying the Desktop Database. For more info, + # see http://docs.djangoproject.com/en/1.1/ref/settings/#database-engine + # ------------------------------------------------------------------------ + [[database]] + engine=sqlite3 + name=/var/lib/hue/desktop.db + # Database engine is typically one of: + # postgresql_psycopg2, mysql, or sqlite3 + # + # Note that for sqlite3, 'name', below is a filename; + # for other backends, it is the database name. + ## engine=sqlite3 + ## host= + ## port= + ## user= + ## password= + ## name=desktop/desktop.db + ## options={} + + + # Configuration options for specifying the Desktop session. + # For more info, see https://docs.djangoproject.com/en/1.4/topics/http/sessions/ + # ------------------------------------------------------------------------ + [[session]] + # The cookie containing the users' session ID will expire after this amount of time in seconds. + ## ttl=60*60*24*14 + + # The cookie containing the users' session ID will be secure. + # Should only be enabled with HTTPS. + ## secure=false + + + # Configuration options for connecting to an external SMTP server + # ------------------------------------------------------------------------ + [[smtp]] + + # The SMTP server information for email notification delivery + host=localhost + port=25 + user= + password= + + # Whether to use a TLS (secure) connection when talking to the SMTP server + tls=no + + # Default email address to use for various automated notification from Hue + ## default_from_email=hue@localhost + + + # Configuration options for Kerberos integration for secured Hadoop clusters + # ------------------------------------------------------------------------ + [[kerberos]] + + # Path to Hue's Kerberos keytab file + ## hue_keytab= + # Kerberos principal name for Hue + ## hue_principal=hue/hostname.foo.com + # Path to kinit + ## kinit_path=/path/to/kinit + + + # Configuration options for using OAuthBackend login + # ------------------------------------------------------------------------ + [[oauth]] + # The Consumer key of the application + ## consumer_key=XXXXXXXXXXXXXXXXXXXXX + + # The Consumer secret of the application + ## consumer_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + + # The Request token URL + ## request_token_url=https://api.twitter.com/oauth/request_token + + # The Access token URL + ## access_token_url=https://api.twitter.com/oauth/access_token + + # The Authorize URL + ## authenticate_url=https://api.twitter.com/oauth/authorize + + +########################################################################### +# Settings to configure SAML +########################################################################### + +[libsaml] + # Xmlsec1 binary path. This program should be executable by the user running Hue. + ## xmlsec_binary=/usr/local/bin/xmlsec1 + + # Entity ID for Hue acting as service provider. + # Can also accept a pattern where '' will be replaced with server URL base. + ## entity_id="/saml2/metadata/" + + # Create users from SSO on login. + ## create_users_on_login=true + + # Required attributes to ask for from IdP. + # This requires a comma separated list. + ## required_attributes=uid + + # Optional attributes to ask for from IdP. + # This requires a comma separated list. + ## optional_attributes= + + # IdP metadata in the form of a file. This is generally an XML file containing metadata that the Identity Provider generates. + ## metadata_file= + + # Private key to encrypt metadata with. + ## key_file= + + # Signed certificate to send along with encrypted metadata. + ## cert_file= + + # A mapping from attributes in the response from the IdP to django user attributes. + ## user_attribute_mapping={'uid':'username'} + + # Have Hue initiated authn requests be signed and provide a certificate. + ## authn_requests_signed=false + + # Have Hue initiated logout requests be signed and provide a certificate. + ## logout_requests_signed=false + + ## Username can be sourced from 'attributes' or 'nameid'. + ## username_source=attributes + + # Performs the logout or not. + ## logout_enabled=true + + +########################################################################### +# Settings to configure your Hadoop cluster. +########################################################################### + +[hadoop] + + # Configuration for HDFS NameNode + # ------------------------------------------------------------------------ + [[hdfs_clusters]] + # HA support by using HttpFs + + [[[default]]] + # Enter the filesystem uri + fs_defaultfs=hdfs://localhost:8020 + + # Change this if your HDFS cluster is Kerberos-secured + ## security_enabled=false + + # NameNode logical name. + ## logical_name= + + # Use WebHdfs/HttpFs as the communication mechanism. + # This should be the web service root URL, such as + # http://namenode:50070/webhdfs/v1 + ## webhdfs_url= + webhdfs_url=http://hc1nn:50070/webhdfs/v1/ + + # Settings about this HDFS cluster. If you install HDFS in a + # different location, you need to set the following. + + # Defaults to $HADOOP_HDFS_HOME or /usr/lib/hadoop-hdfs + hadoop_hdfs_home=/usr/lib/hadoop-hdfs + + # Defaults to $HADOOP_BIN or /usr/bin/hadoop + hadoop_bin=/usr/bin/hadoop + + # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf + hadoop_conf_dir=/etc/hadoop/conf + + # Configuration for MapReduce 0.20 JobTracker (MR1) + # ------------------------------------------------------------------------ + [[mapred_clusters]] + + [[[default]]] + # Enter the host on which you are running the Hadoop JobTracker + #jobtracker_host=hc1nn + # The port where the JobTracker IPC listens on + #jobtracker_port=8032 + # Thrift plug-in port for the JobTracker + ## thrift_port=9290 + # Whether to submit jobs to this cluster + submit_to=False + + # JobTracker logical name. + ## logical_name= + + # Change this if your MapReduce cluster is Kerberos-secured + ## security_enabled=false + + # Settings about this MR1 cluster. If you install MR1 in a + # different location, you need to set the following. + + # Defaults to $HADOOP_MR1_HOME or /usr/lib/hadoop-0.20-mapreduce + ## hadoop_mapred_home=/usr/lib/hadoop-0.20-mapreduce + + # Defaults to $HADOOP_BIN or /usr/bin/hadoop + ## hadoop_bin=/usr/bin/hadoop + + # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf + ## hadoop_conf_dir=/etc/hadoop/conf + + # HA support by specifying multiple clusters + # e.g. + + # [[[ha]]] + # Enter the host on which you are running the failover JobTracker + # jobtracker_host=localhost-ha + + + # Configuration for YARN (MR2) + # ------------------------------------------------------------------------ + [[yarn_clusters]] + + [[[default]]] + # Enter the host on which you are running the ResourceManager + resourcemanager_host=hc1nn + # The port where the ResourceManager IPC listens on + resourcemanager_port=8032 + # Whether to submit jobs to this cluster + submit_to=True + + # Change this if your YARN cluster is Kerberos-secured + ## security_enabled=false + + # Settings about this MR2 cluster. If you install MR2 in a + # different location, you need to set the following. + + # Defaults to $HADOOP_MR2_HOME or /usr/lib/hadoop-mapreduce + ## hadoop_mapred_home=/usr/lib/hadoop-mapreduce + + # Defaults to $HADOOP_BIN or /usr/bin/hadoop + ## hadoop_bin=/usr/bin/hadoop + + # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf + ## hadoop_conf_dir=/etc/hadoop/conf + + # URL of the ResourceManager API + resourcemanager_api_url=http://localhost:8088 + + # URL of the ProxyServer API + proxy_api_url=http://localhost:8088 + + # URL of the HistoryServer API + history_server_api_url=http://localhost:19888 + + # URL of the NodeManager API + node_manager_api_url=http://localhost:8042 + + +########################################################################### +# Settings to configure liboozie +########################################################################### + +[liboozie] + # The URL where the Oozie service runs on. This is required in order for + # users to submit jobs. + oozie_url=http://localhost:11000/oozie + + # Requires FQDN in oozie_url if enabled + ## security_enabled=false + + # Location on HDFS where the workflows/coordinator are deployed when submitted. + ## remote_deployement_dir=/user/hue/oozie/deployments + + +########################################################################### +# Settings to configure the Oozie app +########################################################################### + +[oozie] + # Location on local FS where the examples are stored. + ## local_data_dir=..../examples + + # Location on local FS where the data for the examples is stored. + ## sample_data_dir=...thirdparty/sample_data + + # Location on HDFS where the oozie examples and workflows are stored. + ## remote_data_dir=/user/hue/oozie/workspaces + + # Share workflows and coordinators information with all users. If set to false, + # they will be visible only to the owner and administrators. + ## share_jobs=True + + # Maximum of Oozie workflows or coodinators to retrieve in one API call. + ## oozie_jobs_count=100 + + +########################################################################### +# Settings to configure Beeswax +########################################################################### + +[beeswax] + + # Host where Beeswax server Thrift daemon is running. + # If Kerberos security is enabled, the fully-qualified domain name (FQDN) is + # required, even if the Thrift daemon is running on the same host as Hue. + ## beeswax_server_host= + + # The type of Thrift interface used for contacting the backend for sending + # queries/metadata requests. + # Choices are 'beeswax' (default), 'hiveserver2'. + ## server_interface=beeswax + + # Port where Beeswax Thrift server runs on. + # Use 10000 when using the HiveServer2 interface. + ## beeswax_server_port=8002 + + # Host where internal metastore Thrift daemon is running. + ## beeswax_meta_server_host=localhost + + # Configure the port the internal metastore daemon runs on. + # Used only if hive.metastore.local is true. + ## beeswax_meta_server_port=8003 + + # Hive home directory + ## hive_home_dir=/usr/lib/hive + + # Hive configuration directory, where hive-site.xml is located + hive_conf_dir=/etc/hive/conf + + # Timeout in seconds for thrift calls to beeswax service + ## beeswax_server_conn_timeout=120 + + # Timeout in seconds for thrift calls to the hive metastore + ## metastore_conn_timeout=10 + + # Maximum Java heapsize (in megabytes) used by Beeswax Server. + # Note that the setting of HADOOP_HEAPSIZE in $HADOOP_CONF_DIR/hadoop-env.sh + # may override this setting. + ## beeswax_server_heapsize=1000 + + # Share saved queries with all users. If set to false, saved queries are + # visible only to the owner and administrators. + ## share_saved_queries=true + + # Time in milliseconds for Beeswax to persist queries in its cache. + # 7*24*60*60*1000 = 1 week + ## beeswax_running_query_lifetime=604800000L + + # Set a LIMIT clause when browsing a partitioned table. + # A positive value will be set as the LIMIT. If 0 or negative, do not set any limit. + ## browse_partitioned_table_limit=250 + + +########################################################################### +# Settings to configure Pig +########################################################################### + +[pig] + # Location of piggybank.jar on local filesystem. + ## local_sample_dir=/usr/share/hue/apps/pig/examples + + # Location piggybank.jar will be copied to in HDFS. + ## remote_data_dir=/user/hue/pig/examples + + +########################################################################### +# Settings to configure Sqoop +########################################################################### + +[sqoop] + # Sqoop server URL + server_url=http://hc1r1m1:12000/sqoop + + +########################################################################### +# Settings to configure Proxy +########################################################################### + +[proxy] + # Comma-separated list of regular expressions, + # which match 'host:port' of requested proxy target. + ## whitelist=(localhost|127\.0\.0\.1):(50030|50070|50060|50075) + + # Comma-separated list of regular expressions, + # which match any prefix of 'host:port/path' of requested proxy target. + # This does not support matching GET parameters. + ## blacklist=() + + +########################################################################### +# Settings to configure Impala +########################################################################### + +[impala] + # Host of the Impala Server (one of the Impalad) + server_host=hc1r1m1 + + # The backend to contact for queries/metadata requests. + # Choices are 'beeswax' or 'hiveserver2' (default). + # 'hiveserver2' supports log, progress information, query cancelation + # 'beeswax' requires Beeswax to run for proxying the metadata requests + ## server_interface=hiveserver2 + + # Port of the Impala Server + # Default is 21050 as HiveServer2 Thrift interface is the default. + # Use 21000 when using Beeswax Thrift interface. + ## server_port=21050 + + # Kerberos principal + ## impala_principal=impala/hostname.foo.com + + # Turn on/off impersonation mechanism when talking to Impala + ## impersonation_enabled=False + + +########################################################################### +# Settings to configure Hbase +########################################################################### + +[hbase] + # Comma-separated list of HBase Thrift servers for + # clusters in the format of '(name|host:port)'. + hbase_clusters=(Cluster|hc1r1m1:9090) + + # Hard limit of rows or columns per row fetched before truncating. + ## truncate_limit = 500 + + +########################################################################### +# Settings to configure Solr Search +########################################################################### + +[search] + + # URL of the Solr Server + solr_url=http://hc1nn:8983/solr/ + + # Requires FQDN in solr_url if enabled + security_enabled=false + + ## Query sent when no term is entered + empty_query=*:* + + +########################################################################### +# Settings to configure Job Designer +########################################################################### + +[jobsub] + # Location on HDFS where the jobsub examples and templates are stored. + ## remote_data_dir=/user/hue/jobsub + + # Location on local FS where examples and template are stored. + ## local_data_dir=..../data + + # Location on local FS where sample data is stored + ## sample_data_dir=...thirdparty/sample_data + + +########################################################################### +# Settings to configure Job Browser. +########################################################################### + +[jobbrowser] + # Share submitted jobs information with all users. If set to false, + # submitted jobs are visible only to the owner and administrators. + ## share_jobs=true + + +########################################################################### +# Settings to configure the Shell application +########################################################################### + +[shell] + # The shell_buffer_amount specifies the number of bytes of output per shell + # that the Shell app will keep in memory. If not specified, it defaults to + # 524288 (512 MiB). + ## shell_buffer_amount=100 + + # If you run Hue against a Hadoop cluster with Kerberos security enabled, the + # Shell app needs to acquire delegation tokens for the subprocesses to work + # correctly. These delegation tokens are stored as temporary files in some + # directory. You can configure this directory here. If not specified, it + # defaults to /tmp/hue_delegation_tokens. + ## shell_delegation_token_dir=/tmp/hue_delegation_tokens + + [[ shelltypes ]] + + # Define and configure a new shell type "pig" + # ------------------------------------------------------------------------ + [[[ pig ]]] + nice_name = "Pig Shell (Grunt)" + command = "/usr/bin/pig -l /dev/null" + help = "The command-line interpreter for Pig" + + [[[[ environment ]]]] + # You can specify environment variables for the Pig shell + # in this section. Note that JAVA_HOME must be configured + # for the Pig shell to run. + + [[[[[ JAVA_HOME ]]]]] + value = "/usr/lib/jvm/java-6-sun" + + # Define and configure a new shell type "Sqoop 2" + # ------------------------------------------------------------------------ + [[[ sqoop2 ]]] + nice_name = "Sqoop 2 Shell" + command = "/usr/bin/sqoop2" + help = "The command-line Sqoop 2 client." + + [[[[ environment ]]]] + # You can configure environment variables for the Sqoop 2 shell + # in this section. + + # Define and configure a new shell type "hbase" + # ------------------------------------------------------------------------ + [[[ hbase ]]] + nice_name = "HBase Shell" + command = "/usr/bin/hbase shell" + help = "The command-line HBase client interface." + + [[[[ environment ]]]] + # You can configure environment variables for the HBase shell + # in this section. + + +########################################################################### +# Settings for the User Admin application +########################################################################### + +[useradmin] + # The name of the default user group that users will be a member of + ## default_user_group=default diff --git a/package/chapter 7/nagios/cgi.cfg b/package/chapter 7/nagios/cgi.cfg new file mode 100644 index 0000000..4f0364e --- /dev/null +++ b/package/chapter 7/nagios/cgi.cfg @@ -0,0 +1,364 @@ +################################################################# +# +# CGI.CFG - Sample CGI Configuration File for Nagios 3.5.1 +# +# Last Modified: 06-17-2009 +# +################################################################# + + +# MAIN CONFIGURATION FILE +# This tells the CGIs where to find your main configuration file. +# The CGIs will read the main and host config files for any other +# data they might need. + +main_config_file=/etc/nagios/nagios.cfg + + + +# PHYSICAL HTML PATH +# This is the path where the HTML files for Nagios reside. This +# value is used to locate the logo images needed by the statusmap +# and statuswrl CGIs. + +physical_html_path=/usr/share/nagios/html + + + +# URL HTML PATH +# This is the path portion of the URL that corresponds to the +# physical location of the Nagios HTML files (as defined above). +# This value is used by the CGIs to locate the online documentation +# and graphics. If you access the Nagios pages with an URL like +# http://www.myhost.com/nagios, this value should be '/nagios' +# (without the quotes). + +url_html_path=/nagios + + + +# CONTEXT-SENSITIVE HELP +# This option determines whether or not a context-sensitive +# help icon will be displayed for most of the CGIs. +# Values: 0 = disables context-sensitive help +# 1 = enables context-sensitive help + +show_context_help=0 + + + +# PENDING STATES OPTION +# This option determines what states should be displayed in the web +# interface for hosts/services that have not yet been checked. +# Values: 0 = leave hosts/services that have not been check yet in their original state +# 1 = mark hosts/services that have not been checked yet as PENDING + +use_pending_states=1 + + + + +# AUTHENTICATION USAGE +# This option controls whether or not the CGIs will use any +# authentication when displaying host and service information, as +# well as committing commands to Nagios for processing. +# +# Read the HTML documentation to learn how the authorization works! +# +# NOTE: It is a really *bad* idea to disable authorization, unless +# you plan on removing the command CGI (cmd.cgi)! Failure to do +# so will leave you wide open to kiddies messing with Nagios and +# possibly hitting you with a denial of service attack by filling up +# your drive by continuously writing to your command file! +# +# Setting this value to 0 will cause the CGIs to *not* use +# authentication (bad idea), while any other value will make them +# use the authentication functions (the default). + +use_authentication=1 + + + + +# x509 CERT AUTHENTICATION +# When enabled, this option allows you to use x509 cert (SSL) +# authentication in the CGIs. This is an advanced option and should +# not be enabled unless you know what you're doing. + +use_ssl_authentication=0 + + + + +# DEFAULT USER +# Setting this variable will define a default user name that can +# access pages without authentication. This allows people within a +# secure domain (i.e., behind a firewall) to see the current status +# without authenticating. You may want to use this to avoid basic +# authentication if you are not using a secure server since basic +# authentication transmits passwords in the clear. +# +# Important: Do not define a default username unless you are +# running a secure web server and are sure that everyone who has +# access to the CGIs has been authenticated in some manner! If you +# define this variable, anyone who has not authenticated to the web +# server will inherit all rights you assign to this user! + +#default_user_name=guest + + + +# SYSTEM/PROCESS INFORMATION ACCESS +# This option is a comma-delimited list of all usernames that +# have access to viewing the Nagios process information as +# provided by the Extended Information CGI (extinfo.cgi). By +# default, *no one* has access to this unless you choose to +# not use authorization. You may use an asterisk (*) to +# authorize any user who has authenticated to the web server. + +authorized_for_system_information=nagiosadmin + + + +# CONFIGURATION INFORMATION ACCESS +# This option is a comma-delimited list of all usernames that +# can view ALL configuration information (hosts, commands, etc). +# By default, users can only view configuration information +# for the hosts and services they are contacts for. You may use +# an asterisk (*) to authorize any user who has authenticated +# to the web server. + +authorized_for_configuration_information=nagiosadmin + + + +# SYSTEM/PROCESS COMMAND ACCESS +# This option is a comma-delimited list of all usernames that +# can issue shutdown and restart commands to Nagios via the +# command CGI (cmd.cgi). Users in this list can also change +# the program mode to active or standby. By default, *no one* +# has access to this unless you choose to not use authorization. +# You may use an asterisk (*) to authorize any user who has +# authenticated to the web server. + +authorized_for_system_commands=nagiosadmin + + + +# GLOBAL HOST/SERVICE VIEW ACCESS +# These two options are comma-delimited lists of all usernames that +# can view information for all hosts and services that are being +# monitored. By default, users can only view information +# for hosts or services that they are contacts for (unless you +# you choose to not use authorization). You may use an asterisk (*) +# to authorize any user who has authenticated to the web server. + + +authorized_for_all_services=nagiosadmin +authorized_for_all_hosts=nagiosadmin + + + +# GLOBAL HOST/SERVICE COMMAND ACCESS +# These two options are comma-delimited lists of all usernames that +# can issue host or service related commands via the command +# CGI (cmd.cgi) for all hosts and services that are being monitored. +# By default, users can only issue commands for hosts or services +# that they are contacts for (unless you you choose to not use +# authorization). You may use an asterisk (*) to authorize any +# user who has authenticated to the web server. + +authorized_for_all_service_commands=nagiosadmin +authorized_for_all_host_commands=nagiosadmin + + + +# READ-ONLY USERS +# A comma-delimited list of usernames that have read-only rights in +# the CGIs. This will block any service or host commands normally shown +# on the extinfo CGI pages. It will also block comments from being shown +# to read-only users. + +#authorized_for_read_only=user1,user2 + + + + +# STATUSMAP BACKGROUND IMAGE +# This option allows you to specify an image to be used as a +# background in the statusmap CGI. It is assumed that the image +# resides in the HTML images path (i.e. /usr/local/nagios/share/images). +# This path is automatically determined by appending "/images" +# to the path specified by the 'physical_html_path' directive. +# Note: The image file may be in GIF, PNG, JPEG, or GD2 format. +# However, I recommend that you convert your image to GD2 format +# (uncompressed), as this will cause less CPU load when the CGI +# generates the image. + +#statusmap_background_image=smbackground.gd2 + + + + +# STATUSMAP TRANSPARENCY INDEX COLOR +# These options set the r,g,b values of the background color used the statusmap CGI, +# so normal browsers that can't show real png transparency set the desired color as +# a background color instead (to make it look pretty). +# Defaults to white: (R,G,B) = (255,255,255). + +#color_transparency_index_r=255 +#color_transparency_index_g=255 +#color_transparency_index_b=255 + + + + +# DEFAULT STATUSMAP LAYOUT METHOD +# This option allows you to specify the default layout method +# the statusmap CGI should use for drawing hosts. If you do +# not use this option, the default is to use user-defined +# coordinates. Valid options are as follows: +# 0 = User-defined coordinates +# 1 = Depth layers +# 2 = Collapsed tree +# 3 = Balanced tree +# 4 = Circular +# 5 = Circular (Marked Up) + +default_statusmap_layout=5 + + + +# DEFAULT STATUSWRL LAYOUT METHOD +# This option allows you to specify the default layout method +# the statuswrl (VRML) CGI should use for drawing hosts. If you +# do not use this option, the default is to use user-defined +# coordinates. Valid options are as follows: +# 0 = User-defined coordinates +# 2 = Collapsed tree +# 3 = Balanced tree +# 4 = Circular + +default_statuswrl_layout=4 + + + +# STATUSWRL INCLUDE +# This option allows you to include your own objects in the +# generated VRML world. It is assumed that the file +# resides in the HTML path (i.e. /usr/local/nagios/share). + +#statuswrl_include=myworld.wrl + + + +# PING SYNTAX +# This option determines what syntax should be used when +# attempting to ping a host from the WAP interface (using +# the statuswml CGI. You must include the full path to +# the ping binary, along with all required options. The +# $HOSTADDRESS$ macro is substituted with the address of +# the host before the command is executed. +# Please note that the syntax for the ping binary is +# notorious for being different on virtually ever *NIX +# OS and distribution, so you may have to tweak this to +# work on your system. + +ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$ + + + +# REFRESH RATE +# This option allows you to specify the refresh rate in seconds +# of various CGIs (status, statusmap, extinfo, and outages). + +refresh_rate=90 + +# DEFAULT PAGE LIMIT +# This option allows you to specify the default number of results +# displayed on the status.cgi. This number can be adjusted from +# within the UI after the initial page load. Setting this to 0 +# will show all results. + +result_limit=100 + + +# ESCAPE HTML TAGS +# This option determines whether HTML tags in host and service +# status output is escaped in the web interface. If enabled, +# your plugin output will not be able to contain clickable links. + +escape_html_tags=1 + + + + +# SOUND OPTIONS +# These options allow you to specify an optional audio file +# that should be played in your browser window when there are +# problems on the network. The audio files are used only in +# the status CGI. Only the sound for the most critical problem +# will be played. Order of importance (higher to lower) is as +# follows: unreachable hosts, down hosts, critical services, +# warning services, and unknown services. If there are no +# visible problems, the sound file optionally specified by +# 'normal_sound' variable will be played. +# +# +# = +# +# Note: All audio files must be placed in the /media subdirectory +# under the HTML path (i.e. /usr/local/nagios/share/media/). + +#host_unreachable_sound=hostdown.wav +#host_down_sound=hostdown.wav +#service_critical_sound=critical.wav +#service_warning_sound=warning.wav +#service_unknown_sound=warning.wav +#normal_sound=noproblem.wav + + + +# URL TARGET FRAMES +# These options determine the target frames in which notes and +# action URLs will open. + +action_url_target=_blank +notes_url_target=_blank + + + + +# LOCK AUTHOR NAMES OPTION +# This option determines whether users can change the author name +# when submitting comments, scheduling downtime. If disabled, the +# author names will be locked into their contact name, as defined in Nagios. +# Values: 0 = allow editing author names +# 1 = lock author names (disallow editing) + +lock_author_names=1 + + + + +# SPLUNK INTEGRATION OPTIONS +# These options allow you to enable integration with Splunk +# in the web interface. If enabled, you'll be presented with +# "Splunk It" links in various places in the CGIs (log file, +# alert history, host/service detail, etc). Useful if you're +# trying to research why a particular problem occurred. +# For more information on Splunk, visit http://www.splunk.com/ + +# This option determines whether the Splunk integration is enabled +# Values: 0 = disable Splunk integration +# 1 = enable Splunk integration + +#enable_splunk_integration=1 + + +# This option should be the URL used to access your instance of Splunk + +#splunk_url=http://127.0.0.1:8000/ + + + diff --git a/package/chapter 7/nagios/conf.d/hc1nn.cfg b/package/chapter 7/nagios/conf.d/hc1nn.cfg new file mode 100644 index 0000000..68e55e8 --- /dev/null +++ b/package/chapter 7/nagios/conf.d/hc1nn.cfg @@ -0,0 +1,74 @@ +define host { + use linux-server + host_name hc1nn + alias hc1nn + address 192.168.1.107 + } + +define service { + use generic-service + host_name hc1nn + service_description PING + check_command check_ping!100.0,20%!500.0,60% + } + +define service { + use generic-service + host_name hc1nn + service_description SSH + check_command check_ssh + notifications_enabled 0 + } + +define service { + use generic-service + host_name hc1nn + service_description Current Load + check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0 + } + +##### extra checks + +define service{ + use local-service + host_name hc1nn + service_description Root Partition + check_command check_local_disk!90%!10%!/ + } + +define service{ + use local-service + host_name hc1nn + service_description Current Users + check_command check_local_users!20!50 + } + +define service{ + use local-service + host_name hc1nn + service_description Total Processes + check_command check_local_procs!250!400!RSZDT + } + +define service{ + use local-service + host_name hc1nn + service_description Current Load + check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0 + } + +define service{ + use local-service + host_name hc1nn + service_description Swap Usage + check_command check_local_swap!20!10 + } + +define service{ + use local-service + host_name hc1nn + service_description SSH + check_command check_ssh + notifications_enabled 0 + } + diff --git a/package/chapter 7/nagios/nagios.cfg b/package/chapter 7/nagios/nagios.cfg new file mode 100644 index 0000000..ddf12ac --- /dev/null +++ b/package/chapter 7/nagios/nagios.cfg @@ -0,0 +1,1353 @@ +############################################################################## +# +# NAGIOS.CFG - Sample Main Config File for Nagios 3.5.1 +# +# Read the documentation for more information on this configuration +# file. I've provided some comments here, but things may not be so +# clear without further explanation. +# +# Last Modified: 12-14-2008 +# +############################################################################## + + +# LOG FILE +# This is the main log file where service and host events are logged +# for historical purposes. This should be the first option specified +# in the config file!!! + +log_file=/var/log/nagios/nagios.log + + + +# OBJECT CONFIGURATION FILE(S) +# These are the object configuration files in which you define hosts, +# host groups, contacts, contact groups, services, etc. +# You can split your object definitions across several config files +# if you wish (as shown below), or keep them all in a single config file. + +# You can specify individual object config files as shown below: +cfg_file=/etc/nagios/objects/commands.cfg +cfg_file=/etc/nagios/objects/contacts.cfg +cfg_file=/etc/nagios/objects/timeperiods.cfg +cfg_file=/etc/nagios/objects/templates.cfg + +# Definitions for monitoring the local (Linux) host +#cfg_file=/etc/nagios/objects/localhost.cfg + +# Definitions for monitoring a Windows machine +#cfg_file=/etc/nagios/objects/windows.cfg + +# Definitions for monitoring a router/switch +#cfg_file=/etc/nagios/objects/switch.cfg + +# Definitions for monitoring a network printer +#cfg_file=/etc/nagios/objects/printer.cfg + + +# You can also tell Nagios to process all config files (with a .cfg +# extension) in a particular directory by using the cfg_dir +# directive as shown below: + +#cfg_dir=/etc/nagios/servers +#cfg_dir=/etc/nagios/printers +#cfg_dir=/etc/nagios/switches +#cfg_dir=/etc/nagios/routers + +cfg_dir=/etc/nagios/conf.d + + + + +# OBJECT CACHE FILE +# This option determines where object definitions are cached when +# Nagios starts/restarts. The CGIs read object definitions from +# this cache file (rather than looking at the object config files +# directly) in order to prevent inconsistencies that can occur +# when the config files are modified after Nagios starts. + +object_cache_file=/var/log/nagios/objects.cache + + + +# PRE-CACHED OBJECT FILE +# This options determines the location of the precached object file. +# If you run Nagios with the -p command line option, it will preprocess +# your object configuration file(s) and write the cached config to this +# file. You can then start Nagios with the -u option to have it read +# object definitions from this precached file, rather than the standard +# object configuration files (see the cfg_file and cfg_dir options above). +# Using a precached object file can speed up the time needed to (re)start +# the Nagios process if you've got a large and/or complex configuration. +# Read the documentation section on optimizing Nagios to find our more +# about how this feature works. + +precached_object_file=/var/log/nagios/objects.precache + + + +# RESOURCE FILE +# This is an optional resource file that contains $USERx$ macro +# definitions. Multiple resource files can be specified by using +# multiple resource_file definitions. The CGIs will not attempt to +# read the contents of resource files, so information that is +# considered to be sensitive (usernames, passwords, etc) can be +# defined as macros in this file and restrictive permissions (600) +# can be placed on this file. + +resource_file=/etc/nagios/private/resource.cfg + + + +# STATUS FILE +# This is where the current status of all monitored services and +# hosts is stored. Its contents are read and processed by the CGIs. +# The contents of the status file are deleted every time Nagios +# restarts. + +status_file=/var/log/nagios/status.dat + + + +# STATUS FILE UPDATE INTERVAL +# This option determines the frequency (in seconds) that +# Nagios will periodically dump program, host, and +# service status data. + +status_update_interval=10 + + + +# NAGIOS USER +# This determines the effective user that Nagios should run as. +# You can either supply a username or a UID. + +nagios_user=nagios + + + +# NAGIOS GROUP +# This determines the effective group that Nagios should run as. +# You can either supply a group name or a GID. + +nagios_group=nagios + + + +# EXTERNAL COMMAND OPTION +# This option allows you to specify whether or not Nagios should check +# for external commands (in the command file defined below). By default +# Nagios will *not* check for external commands, just to be on the +# cautious side. If you want to be able to use the CGI command interface +# you will have to enable this. +# Values: 0 = disable commands, 1 = enable commands + +check_external_commands=1 + + + +# EXTERNAL COMMAND CHECK INTERVAL +# This is the interval at which Nagios should check for external commands. +# This value works of the interval_length you specify later. If you leave +# that at its default value of 60 (seconds), a value of 1 here will cause +# Nagios to check for external commands every minute. If you specify a +# number followed by an "s" (i.e. 15s), this will be interpreted to mean +# actual seconds rather than a multiple of the interval_length variable. +# Note: In addition to reading the external command file at regularly +# scheduled intervals, Nagios will also check for external commands after +# event handlers are executed. +# NOTE: Setting this value to -1 causes Nagios to check the external +# command file as often as possible. + +#command_check_interval=15s +command_check_interval=-1 + + + +# EXTERNAL COMMAND FILE +# This is the file that Nagios checks for external command requests. +# It is also where the command CGI will write commands that are submitted +# by users, so it must be writeable by the user that the web server +# is running as (usually 'nobody'). Permissions should be set at the +# directory level instead of on the file, as the file is deleted every +# time its contents are processed. + +command_file=/var/spool/nagios/cmd/nagios.cmd + + + +# EXTERNAL COMMAND BUFFER SLOTS +# This settings is used to tweak the number of items or "slots" that +# the Nagios daemon should allocate to the buffer that holds incoming +# external commands before they are processed. As external commands +# are processed by the daemon, they are removed from the buffer. + +external_command_buffer_slots=4096 + + + +# LOCK FILE +# This is the lockfile that Nagios will use to store its PID number +# in when it is running in daemon mode. + +lock_file=/var/run/nagios.pid + + + +# TEMP FILE +# This is a temporary file that is used as scratch space when Nagios +# updates the status log, cleans the comment file, etc. This file +# is created, used, and deleted throughout the time that Nagios is +# running. + +temp_file=/var/log/nagios/nagios.tmp + + + +# TEMP PATH +# This is path where Nagios can create temp files for service and +# host check results, etc. + +temp_path=/tmp + + + +# EVENT BROKER OPTIONS +# Controls what (if any) data gets sent to the event broker. +# Values: 0 = Broker nothing +# -1 = Broker everything +# = See documentation + +event_broker_options=-1 + + + +# EVENT BROKER MODULE(S) +# This directive is used to specify an event broker module that should +# by loaded by Nagios at startup. Use multiple directives if you want +# to load more than one module. Arguments that should be passed to +# the module at startup are seperated from the module path by a space. +# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# WARNING !!! WARNING !!! WARNING !!! WARNING !!! WARNING !!! WARNING +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# +# Do NOT overwrite modules while they are being used by Nagios or Nagios +# will crash in a fiery display of SEGFAULT glory. This is a bug/limitation +# either in dlopen(), the kernel, and/or the filesystem. And maybe Nagios... +# +# The correct/safe way of updating a module is by using one of these methods: +# 1. Shutdown Nagios, replace the module file, restart Nagios +# 2. Delete the original module file, move the new module file into place, restart Nagios +# +# Example: +# +# broker_module= [moduleargs] + +#broker_module=/somewhere/module1.o +#broker_module=/somewhere/module2.o arg1 arg2=3 debug=0 + + + +# LOG ROTATION METHOD +# This is the log rotation method that Nagios should use to rotate +# the main log file. Values are as follows.. +# n = None - don't rotate the log +# h = Hourly rotation (top of the hour) +# d = Daily rotation (midnight every day) +# w = Weekly rotation (midnight on Saturday evening) +# m = Monthly rotation (midnight last day of month) + +log_rotation_method=d + + + +# LOG ARCHIVE PATH +# This is the directory where archived (rotated) log files should be +# placed (assuming you've chosen to do log rotation). + +log_archive_path=/var/log/nagios/archives + + + +# LOGGING OPTIONS +# If you want messages logged to the syslog facility, as well as the +# Nagios log file set this option to 1. If not, set it to 0. + +use_syslog=1 + + + +# NOTIFICATION LOGGING OPTION +# If you don't want notifications to be logged, set this value to 0. +# If notifications should be logged, set the value to 1. + +log_notifications=1 + + + +# SERVICE RETRY LOGGING OPTION +# If you don't want service check retries to be logged, set this value +# to 0. If retries should be logged, set the value to 1. + +log_service_retries=1 + + + +# HOST RETRY LOGGING OPTION +# If you don't want host check retries to be logged, set this value to +# 0. If retries should be logged, set the value to 1. + +log_host_retries=1 + + + +# EVENT HANDLER LOGGING OPTION +# If you don't want host and service event handlers to be logged, set +# this value to 0. If event handlers should be logged, set the value +# to 1. + +log_event_handlers=1 + + + +# INITIAL STATES LOGGING OPTION +# If you want Nagios to log all initial host and service states to +# the main log file (the first time the service or host is checked) +# you can enable this option by setting this value to 1. If you +# are not using an external application that does long term state +# statistics reporting, you do not need to enable this option. In +# this case, set the value to 0. + +log_initial_states=0 + + + +# EXTERNAL COMMANDS LOGGING OPTION +# If you don't want Nagios to log external commands, set this value +# to 0. If external commands should be logged, set this value to 1. +# Note: This option does not include logging of passive service +# checks - see the option below for controlling whether or not +# passive checks are logged. + +log_external_commands=1 + + + +# PASSIVE CHECKS LOGGING OPTION +# If you don't want Nagios to log passive host and service checks, set +# this value to 0. If passive checks should be logged, set +# this value to 1. + +log_passive_checks=1 + + + +# GLOBAL HOST AND SERVICE EVENT HANDLERS +# These options allow you to specify a host and service event handler +# command that is to be run for every host or service state change. +# The global event handler is executed immediately prior to the event +# handler that you have optionally specified in each host or +# service definition. The command argument is the short name of a +# command definition that you define in your host configuration file. +# Read the HTML docs for more information. + +#global_host_event_handler=somecommand +#global_service_event_handler=somecommand + + + +# SERVICE INTER-CHECK DELAY METHOD +# This is the method that Nagios should use when initially +# "spreading out" service checks when it starts monitoring. The +# default is to use smart delay calculation, which will try to +# space all service checks out evenly to minimize CPU load. +# Using the dumb setting will cause all checks to be scheduled +# at the same time (with no delay between them)! This is not a +# good thing for production, but is useful when testing the +# parallelization functionality. +# n = None - don't use any delay between checks +# d = Use a "dumb" delay of 1 second between checks +# s = Use "smart" inter-check delay calculation +# x.xx = Use an inter-check delay of x.xx seconds + +service_inter_check_delay_method=s + + + +# MAXIMUM SERVICE CHECK SPREAD +# This variable determines the timeframe (in minutes) from the +# program start time that an initial check of all services should +# be completed. Default is 30 minutes. + +max_service_check_spread=30 + + + +# SERVICE CHECK INTERLEAVE FACTOR +# This variable determines how service checks are interleaved. +# Interleaving the service checks allows for a more even +# distribution of service checks and reduced load on remote +# hosts. Setting this value to 1 is equivalent to how versions +# of Nagios previous to 0.0.5 did service checks. Set this +# value to s (smart) for automatic calculation of the interleave +# factor unless you have a specific reason to change it. +# s = Use "smart" interleave factor calculation +# x = Use an interleave factor of x, where x is a +# number greater than or equal to 1. + +service_interleave_factor=s + + + +# HOST INTER-CHECK DELAY METHOD +# This is the method that Nagios should use when initially +# "spreading out" host checks when it starts monitoring. The +# default is to use smart delay calculation, which will try to +# space all host checks out evenly to minimize CPU load. +# Using the dumb setting will cause all checks to be scheduled +# at the same time (with no delay between them)! +# n = None - don't use any delay between checks +# d = Use a "dumb" delay of 1 second between checks +# s = Use "smart" inter-check delay calculation +# x.xx = Use an inter-check delay of x.xx seconds + +host_inter_check_delay_method=s + + + +# MAXIMUM HOST CHECK SPREAD +# This variable determines the timeframe (in minutes) from the +# program start time that an initial check of all hosts should +# be completed. Default is 30 minutes. + +max_host_check_spread=30 + + + +# MAXIMUM CONCURRENT SERVICE CHECKS +# This option allows you to specify the maximum number of +# service checks that can be run in parallel at any given time. +# Specifying a value of 1 for this variable essentially prevents +# any service checks from being parallelized. A value of 0 +# will not restrict the number of concurrent checks that are +# being executed. + +max_concurrent_checks=0 + + + +# HOST AND SERVICE CHECK REAPER FREQUENCY +# This is the frequency (in seconds!) that Nagios will process +# the results of host and service checks. + +check_result_reaper_frequency=10 + + + + +# MAX CHECK RESULT REAPER TIME +# This is the max amount of time (in seconds) that a single +# check result reaper event will be allowed to run before +# returning control back to Nagios so it can perform other +# duties. + +max_check_result_reaper_time=30 + + + + +# CHECK RESULT PATH +# This is directory where Nagios stores the results of host and +# service checks that have not yet been processed. +# +# Note: Make sure that only one instance of Nagios has access +# to this directory! + +check_result_path=/var/log/nagios/spool/checkresults + + + + +# MAX CHECK RESULT FILE AGE +# This option determines the maximum age (in seconds) which check +# result files are considered to be valid. Files older than this +# threshold will be mercilessly deleted without further processing. + +max_check_result_file_age=3600 + + + + +# CACHED HOST CHECK HORIZON +# This option determines the maximum amount of time (in seconds) +# that the state of a previous host check is considered current. +# Cached host states (from host checks that were performed more +# recently that the timeframe specified by this value) can immensely +# improve performance in regards to the host check logic. +# Too high of a value for this option may result in inaccurate host +# states being used by Nagios, while a lower value may result in a +# performance hit for host checks. Use a value of 0 to disable host +# check caching. + +cached_host_check_horizon=15 + + + +# CACHED SERVICE CHECK HORIZON +# This option determines the maximum amount of time (in seconds) +# that the state of a previous service check is considered current. +# Cached service states (from service checks that were performed more +# recently that the timeframe specified by this value) can immensely +# improve performance in regards to predictive dependency checks. +# Use a value of 0 to disable service check caching. + +cached_service_check_horizon=15 + + + +# ENABLE PREDICTIVE HOST DEPENDENCY CHECKS +# This option determines whether or not Nagios will attempt to execute +# checks of hosts when it predicts that future dependency logic test +# may be needed. These predictive checks can help ensure that your +# host dependency logic works well. +# Values: +# 0 = Disable predictive checks +# 1 = Enable predictive checks (default) + +enable_predictive_host_dependency_checks=1 + + + +# ENABLE PREDICTIVE SERVICE DEPENDENCY CHECKS +# This option determines whether or not Nagios will attempt to execute +# checks of service when it predicts that future dependency logic test +# may be needed. These predictive checks can help ensure that your +# service dependency logic works well. +# Values: +# 0 = Disable predictive checks +# 1 = Enable predictive checks (default) + +enable_predictive_service_dependency_checks=1 + + + +# SOFT STATE DEPENDENCIES +# This option determines whether or not Nagios will use soft state +# information when checking host and service dependencies. Normally +# Nagios will only use the latest hard host or service state when +# checking dependencies. If you want it to use the latest state (regardless +# of whether its a soft or hard state type), enable this option. +# Values: +# 0 = Don't use soft state dependencies (default) +# 1 = Use soft state dependencies + +soft_state_dependencies=0 + + + +# TIME CHANGE ADJUSTMENT THRESHOLDS +# These options determine when Nagios will react to detected changes +# in system time (either forward or backwards). + +#time_change_threshold=900 + + + +# AUTO-RESCHEDULING OPTION +# This option determines whether or not Nagios will attempt to +# automatically reschedule active host and service checks to +# "smooth" them out over time. This can help balance the load on +# the monitoring server. +# WARNING: THIS IS AN EXPERIMENTAL FEATURE - IT CAN DEGRADE +# PERFORMANCE, RATHER THAN INCREASE IT, IF USED IMPROPERLY + +auto_reschedule_checks=0 + + + +# AUTO-RESCHEDULING INTERVAL +# This option determines how often (in seconds) Nagios will +# attempt to automatically reschedule checks. This option only +# has an effect if the auto_reschedule_checks option is enabled. +# Default is 30 seconds. +# WARNING: THIS IS AN EXPERIMENTAL FEATURE - IT CAN DEGRADE +# PERFORMANCE, RATHER THAN INCREASE IT, IF USED IMPROPERLY + +auto_rescheduling_interval=30 + + + +# AUTO-RESCHEDULING WINDOW +# This option determines the "window" of time (in seconds) that +# Nagios will look at when automatically rescheduling checks. +# Only host and service checks that occur in the next X seconds +# (determined by this variable) will be rescheduled. This option +# only has an effect if the auto_reschedule_checks option is +# enabled. Default is 180 seconds (3 minutes). +# WARNING: THIS IS AN EXPERIMENTAL FEATURE - IT CAN DEGRADE +# PERFORMANCE, RATHER THAN INCREASE IT, IF USED IMPROPERLY + +auto_rescheduling_window=180 + + + +# SLEEP TIME +# This is the number of seconds to sleep between checking for system +# events and service checks that need to be run. + +sleep_time=0.25 + + + +# TIMEOUT VALUES +# These options control how much time Nagios will allow various +# types of commands to execute before killing them off. Options +# are available for controlling maximum time allotted for +# service checks, host checks, event handlers, notifications, the +# ocsp command, and performance data commands. All values are in +# seconds. + +service_check_timeout=60 +host_check_timeout=30 +event_handler_timeout=30 +notification_timeout=30 +ocsp_timeout=5 +perfdata_timeout=5 + + + +# RETAIN STATE INFORMATION +# This setting determines whether or not Nagios will save state +# information for services and hosts before it shuts down. Upon +# startup Nagios will reload all saved service and host state +# information before starting to monitor. This is useful for +# maintaining long-term data on state statistics, etc, but will +# slow Nagios down a bit when it (re)starts. Since its only +# a one-time penalty, I think its well worth the additional +# startup delay. + +retain_state_information=1 + + + +# STATE RETENTION FILE +# This is the file that Nagios should use to store host and +# service state information before it shuts down. The state +# information in this file is also read immediately prior to +# starting to monitor the network when Nagios is restarted. +# This file is used only if the retain_state_information +# variable is set to 1. + +state_retention_file=/var/log/nagios/retention.dat + + + +# RETENTION DATA UPDATE INTERVAL +# This setting determines how often (in minutes) that Nagios +# will automatically save retention data during normal operation. +# If you set this value to 0, Nagios will not save retention +# data at regular interval, but it will still save retention +# data before shutting down or restarting. If you have disabled +# state retention, this option has no effect. + +retention_update_interval=60 + + + +# USE RETAINED PROGRAM STATE +# This setting determines whether or not Nagios will set +# program status variables based on the values saved in the +# retention file. If you want to use retained program status +# information, set this value to 1. If not, set this value +# to 0. + +use_retained_program_state=1 + + + +# USE RETAINED SCHEDULING INFO +# This setting determines whether or not Nagios will retain +# the scheduling info (next check time) for hosts and services +# based on the values saved in the retention file. If you +# If you want to use retained scheduling info, set this +# value to 1. If not, set this value to 0. + +use_retained_scheduling_info=1 + + + +# RETAINED ATTRIBUTE MASKS (ADVANCED FEATURE) +# The following variables are used to specify specific host and +# service attributes that should *not* be retained by Nagios during +# program restarts. +# +# The values of the masks are bitwise ANDs of values specified +# by the "MODATTR_" definitions found in include/common.h. +# For example, if you do not want the current enabled/disabled state +# of flap detection and event handlers for hosts to be retained, you +# would use a value of 24 for the host attribute mask... +# MODATTR_EVENT_HANDLER_ENABLED (8) + MODATTR_FLAP_DETECTION_ENABLED (16) = 24 + +# This mask determines what host attributes are not retained +retained_host_attribute_mask=0 + +# This mask determines what service attributes are not retained +retained_service_attribute_mask=0 + +# These two masks determine what process attributes are not retained. +# There are two masks, because some process attributes have host and service +# options. For example, you can disable active host checks, but leave active +# service checks enabled. +retained_process_host_attribute_mask=0 +retained_process_service_attribute_mask=0 + +# These two masks determine what contact attributes are not retained. +# There are two masks, because some contact attributes have host and +# service options. For example, you can disable host notifications for +# a contact, but leave service notifications enabled for them. +retained_contact_host_attribute_mask=0 +retained_contact_service_attribute_mask=0 + + + +# INTERVAL LENGTH +# This is the seconds per unit interval as used in the +# host/contact/service configuration files. Setting this to 60 means +# that each interval is one minute long (60 seconds). Other settings +# have not been tested much, so your mileage is likely to vary... + +interval_length=60 + + + +# CHECK FOR UPDATES +# This option determines whether Nagios will automatically check to +# see if new updates (releases) are available. It is recommend that you +# enable this option to ensure that you stay on top of the latest critical +# patches to Nagios. Nagios is critical to you - make sure you keep it in +# good shape. Nagios will check once a day for new updates. Data collected +# by Nagios Enterprises from the update check is processed in accordance +# with our privacy policy - see http://api.nagios.org for details. + +check_for_updates=1 + + + +# BARE UPDATE CHECK +# This option deterines what data Nagios will send to api.nagios.org when +# it checks for updates. By default, Nagios will send information on the +# current version of Nagios you have installed, as well as an indicator as +# to whether this was a new installation or not. Nagios Enterprises uses +# this data to determine the number of users running specific version of +# Nagios. Enable this option if you do not want this information to be sent. + +bare_update_check=0 + + + +# AGGRESSIVE HOST CHECKING OPTION +# If you don't want to turn on aggressive host checking features, set +# this value to 0 (the default). Otherwise set this value to 1 to +# enable the aggressive check option. Read the docs for more info +# on what aggressive host check is or check out the source code in +# base/checks.c + +use_aggressive_host_checking=0 + + + +# SERVICE CHECK EXECUTION OPTION +# This determines whether or not Nagios will actively execute +# service checks when it initially starts. If this option is +# disabled, checks are not actively made, but Nagios can still +# receive and process passive check results that come in. Unless +# you're implementing redundant hosts or have a special need for +# disabling the execution of service checks, leave this enabled! +# Values: 1 = enable checks, 0 = disable checks + +execute_service_checks=1 + + + +# PASSIVE SERVICE CHECK ACCEPTANCE OPTION +# This determines whether or not Nagios will accept passive +# service checks results when it initially (re)starts. +# Values: 1 = accept passive checks, 0 = reject passive checks + +accept_passive_service_checks=1 + + + +# HOST CHECK EXECUTION OPTION +# This determines whether or not Nagios will actively execute +# host checks when it initially starts. If this option is +# disabled, checks are not actively made, but Nagios can still +# receive and process passive check results that come in. Unless +# you're implementing redundant hosts or have a special need for +# disabling the execution of host checks, leave this enabled! +# Values: 1 = enable checks, 0 = disable checks + +execute_host_checks=1 + + + +# PASSIVE HOST CHECK ACCEPTANCE OPTION +# This determines whether or not Nagios will accept passive +# host checks results when it initially (re)starts. +# Values: 1 = accept passive checks, 0 = reject passive checks + +accept_passive_host_checks=1 + + + +# NOTIFICATIONS OPTION +# This determines whether or not Nagios will sent out any host or +# service notifications when it is initially (re)started. +# Values: 1 = enable notifications, 0 = disable notifications + +enable_notifications=1 + + + +# EVENT HANDLER USE OPTION +# This determines whether or not Nagios will run any host or +# service event handlers when it is initially (re)started. Unless +# you're implementing redundant hosts, leave this option enabled. +# Values: 1 = enable event handlers, 0 = disable event handlers + +enable_event_handlers=1 + + + +# PROCESS PERFORMANCE DATA OPTION +# This determines whether or not Nagios will process performance +# data returned from service and host checks. If this option is +# enabled, host performance data will be processed using the +# host_perfdata_command (defined below) and service performance +# data will be processed using the service_perfdata_command (also +# defined below). Read the HTML docs for more information on +# performance data. +# Values: 1 = process performance data, 0 = do not process performance data + +process_performance_data=0 + + + +# HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS +# These commands are run after every host and service check is +# performed. These commands are executed only if the +# enable_performance_data option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on performance data. + +#host_perfdata_command=process-host-perfdata +#service_perfdata_command=process-service-perfdata + + + +# HOST AND SERVICE PERFORMANCE DATA FILES +# These files are used to store host and service performance data. +# Performance data is only written to these files if the +# enable_performance_data option (above) is set to 1. + +#host_perfdata_file=/tmp/host-perfdata +#service_perfdata_file=/tmp/service-perfdata + + + +# HOST AND SERVICE PERFORMANCE DATA FILE TEMPLATES +# These options determine what data is written (and how) to the +# performance data files. The templates may contain macros, special +# characters (\t for tab, \r for carriage return, \n for newline) +# and plain text. A newline is automatically added after each write +# to the performance data file. Some examples of what you can do are +# shown below. + +#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$ +#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$ + + + +# HOST AND SERVICE PERFORMANCE DATA FILE MODES +# This option determines whether or not the host and service +# performance data files are opened in write ("w") or append ("a") +# mode. If you want to use named pipes, you should use the special +# pipe ("p") mode which avoid blocking at startup, otherwise you will +# likely want the defult append ("a") mode. + +#host_perfdata_file_mode=a +#service_perfdata_file_mode=a + + + +# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING INTERVAL +# These options determine how often (in seconds) the host and service +# performance data files are processed using the commands defined +# below. A value of 0 indicates the files should not be periodically +# processed. + +#host_perfdata_file_processing_interval=0 +#service_perfdata_file_processing_interval=0 + + + +# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING COMMANDS +# These commands are used to periodically process the host and +# service performance data files. The interval at which the +# processing occurs is determined by the options above. + +#host_perfdata_file_processing_command=process-host-perfdata-file +#service_perfdata_file_processing_command=process-service-perfdata-file + + + +# HOST AND SERVICE PERFORMANCE DATA PROCESS EMPTY RESULTS +# THese options determine wether the core will process empty perfdata +# results or not. This is needed for distributed monitoring, and intentionally +# turned on by default. +# If you don't require empty perfdata - saving some cpu cycles +# on unwanted macro calculation - you can turn that off. Be careful! +# Values: 1 = enable, 0 = disable + +#host_perfdata_process_empty_results=1 +#service_perfdata_process_empty_results=1 + + +# OBSESS OVER SERVICE CHECKS OPTION +# This determines whether or not Nagios will obsess over service +# checks and run the ocsp_command defined below. Unless you're +# planning on implementing distributed monitoring, do not enable +# this option. Read the HTML docs for more information on +# implementing distributed monitoring. +# Values: 1 = obsess over services, 0 = do not obsess (default) + +obsess_over_services=0 + + + +# OBSESSIVE COMPULSIVE SERVICE PROCESSOR COMMAND +# This is the command that is run for every service check that is +# processed by Nagios. This command is executed only if the +# obsess_over_services option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on implementing distributed monitoring. + +#ocsp_command=somecommand + + + +# OBSESS OVER HOST CHECKS OPTION +# This determines whether or not Nagios will obsess over host +# checks and run the ochp_command defined below. Unless you're +# planning on implementing distributed monitoring, do not enable +# this option. Read the HTML docs for more information on +# implementing distributed monitoring. +# Values: 1 = obsess over hosts, 0 = do not obsess (default) + +obsess_over_hosts=0 + + + +# OBSESSIVE COMPULSIVE HOST PROCESSOR COMMAND +# This is the command that is run for every host check that is +# processed by Nagios. This command is executed only if the +# obsess_over_hosts option (above) is set to 1. The command +# argument is the short name of a command definition that you +# define in your host configuration file. Read the HTML docs for +# more information on implementing distributed monitoring. + +#ochp_command=somecommand + + + +# TRANSLATE PASSIVE HOST CHECKS OPTION +# This determines whether or not Nagios will translate +# DOWN/UNREACHABLE passive host check results into their proper +# state for this instance of Nagios. This option is useful +# if you have distributed or failover monitoring setup. In +# these cases your other Nagios servers probably have a different +# "view" of the network, with regards to the parent/child relationship +# of hosts. If a distributed monitoring server thinks a host +# is DOWN, it may actually be UNREACHABLE from the point of +# this Nagios instance. Enabling this option will tell Nagios +# to translate any DOWN or UNREACHABLE host states it receives +# passively into the correct state from the view of this server. +# Values: 1 = perform translation, 0 = do not translate (default) + +translate_passive_host_checks=0 + + + +# PASSIVE HOST CHECKS ARE SOFT OPTION +# This determines whether or not Nagios will treat passive host +# checks as being HARD or SOFT. By default, a passive host check +# result will put a host into a HARD state type. This can be changed +# by enabling this option. +# Values: 0 = passive checks are HARD, 1 = passive checks are SOFT + +passive_host_checks_are_soft=0 + + + +# ORPHANED HOST/SERVICE CHECK OPTIONS +# These options determine whether or not Nagios will periodically +# check for orphaned host service checks. Since service checks are +# not rescheduled until the results of their previous execution +# instance are processed, there exists a possibility that some +# checks may never get rescheduled. A similar situation exists for +# host checks, although the exact scheduling details differ a bit +# from service checks. Orphaned checks seem to be a rare +# problem and should not happen under normal circumstances. +# If you have problems with service checks never getting +# rescheduled, make sure you have orphaned service checks enabled. +# Values: 1 = enable checks, 0 = disable checks + +check_for_orphaned_services=1 +check_for_orphaned_hosts=1 + + + +# SERVICE FRESHNESS CHECK OPTION +# This option determines whether or not Nagios will periodically +# check the "freshness" of service results. Enabling this option +# is useful for ensuring passive checks are received in a timely +# manner. +# Values: 1 = enabled freshness checking, 0 = disable freshness checking + +check_service_freshness=1 + + + +# SERVICE FRESHNESS CHECK INTERVAL +# This setting determines how often (in seconds) Nagios will +# check the "freshness" of service check results. If you have +# disabled service freshness checking, this option has no effect. + +service_freshness_check_interval=60 + + + +# SERVICE CHECK TIMEOUT STATE +# This setting determines the state Nagios will report when a +# service check times out - that is does not respond within +# service_check_timeout seconds. This can be useful if a +# machine is running at too high a load and you do not want +# to consider a failed service check to be critical (the default). +# Valid settings are: +# c - Critical (default) +# u - Unknown +# w - Warning +# o - OK + +service_check_timeout_state=c + + + +# HOST FRESHNESS CHECK OPTION +# This option determines whether or not Nagios will periodically +# check the "freshness" of host results. Enabling this option +# is useful for ensuring passive checks are received in a timely +# manner. +# Values: 1 = enabled freshness checking, 0 = disable freshness checking + +check_host_freshness=0 + + + +# HOST FRESHNESS CHECK INTERVAL +# This setting determines how often (in seconds) Nagios will +# check the "freshness" of host check results. If you have +# disabled host freshness checking, this option has no effect. + +host_freshness_check_interval=60 + + + + +# ADDITIONAL FRESHNESS THRESHOLD LATENCY +# This setting determines the number of seconds that Nagios +# will add to any host and service freshness thresholds that +# it calculates (those not explicitly specified by the user). + +additional_freshness_latency=15 + + + + +# FLAP DETECTION OPTION +# This option determines whether or not Nagios will try +# and detect hosts and services that are "flapping". +# Flapping occurs when a host or service changes between +# states too frequently. When Nagios detects that a +# host or service is flapping, it will temporarily suppress +# notifications for that host/service until it stops +# flapping. Flap detection is very experimental, so read +# the HTML documentation before enabling this feature! +# Values: 1 = enable flap detection +# 0 = disable flap detection (default) + +enable_flap_detection=1 + + + +# FLAP DETECTION THRESHOLDS FOR HOSTS AND SERVICES +# Read the HTML documentation on flap detection for +# an explanation of what this option does. This option +# has no effect if flap detection is disabled. + +low_service_flap_threshold=5.0 +high_service_flap_threshold=20.0 +low_host_flap_threshold=5.0 +high_host_flap_threshold=20.0 + + + +# DATE FORMAT OPTION +# This option determines how short dates are displayed. Valid options +# include: +# us (MM-DD-YYYY HH:MM:SS) +# euro (DD-MM-YYYY HH:MM:SS) +# iso8601 (YYYY-MM-DD HH:MM:SS) +# strict-iso8601 (YYYY-MM-DDTHH:MM:SS) +# + +date_format=us + + + + +# TIMEZONE OFFSET +# This option is used to override the default timezone that this +# instance of Nagios runs in. If not specified, Nagios will use +# the system configured timezone. +# +# NOTE: In order to display the correct timezone in the CGIs, you +# will also need to alter the Apache directives for the CGI path +# to include your timezone. Example: +# +# +# SetEnv TZ "Australia/Brisbane" +# ... +# + +#use_timezone=US/Mountain +#use_timezone=Australia/Brisbane + + + + +# P1.PL FILE LOCATION +# This value determines where the p1.pl perl script (used by the +# embedded Perl interpreter) is located. If you didn't compile +# Nagios with embedded Perl support, this option has no effect. + +p1_file=/usr/sbin/p1.pl + + + +# EMBEDDED PERL INTERPRETER OPTION +# This option determines whether or not the embedded Perl interpreter +# will be enabled during runtime. This option has no effect if Nagios +# has not been compiled with support for embedded Perl. +# Values: 0 = disable interpreter, 1 = enable interpreter + +enable_embedded_perl=1 + + + +# EMBEDDED PERL USAGE OPTION +# This option determines whether or not Nagios will process Perl plugins +# and scripts with the embedded Perl interpreter if the plugins/scripts +# do not explicitly indicate whether or not it is okay to do so. Read +# the HTML documentation on the embedded Perl interpreter for more +# information on how this option works. + +use_embedded_perl_implicitly=1 + + + +# ILLEGAL OBJECT NAME CHARACTERS +# This option allows you to specify illegal characters that cannot +# be used in host names, service descriptions, or names of other +# object types. + +illegal_object_name_chars=`~!$%^&*|'"<>?,()= + + + +# ILLEGAL MACRO OUTPUT CHARACTERS +# This option allows you to specify illegal characters that are +# stripped from macros before being used in notifications, event +# handlers, etc. This DOES NOT affect macros used in service or +# host check commands. +# The following macros are stripped of the characters you specify: +# $HOSTOUTPUT$ +# $HOSTPERFDATA$ +# $HOSTACKAUTHOR$ +# $HOSTACKCOMMENT$ +# $SERVICEOUTPUT$ +# $SERVICEPERFDATA$ +# $SERVICEACKAUTHOR$ +# $SERVICEACKCOMMENT$ + +illegal_macro_output_chars=`~$&|'"<> + + + +# REGULAR EXPRESSION MATCHING +# This option controls whether or not regular expression matching +# takes place in the object config files. Regular expression +# matching is used to match host, hostgroup, service, and service +# group names/descriptions in some fields of various object types. +# Values: 1 = enable regexp matching, 0 = disable regexp matching + +use_regexp_matching=0 + + + +# "TRUE" REGULAR EXPRESSION MATCHING +# This option controls whether or not "true" regular expression +# matching takes place in the object config files. This option +# only has an effect if regular expression matching is enabled +# (see above). If this option is DISABLED, regular expression +# matching only occurs if a string contains wildcard characters +# (* and ?). If the option is ENABLED, regexp matching occurs +# all the time (which can be annoying). +# Values: 1 = enable true matching, 0 = disable true matching + +use_true_regexp_matching=0 + + + +# ADMINISTRATOR EMAIL/PAGER ADDRESSES +# The email and pager address of a global administrator (likely you). +# Nagios never uses these values itself, but you can access them by +# using the $ADMINEMAIL$ and $ADMINPAGER$ macros in your notification +# commands. + +#admin_email=nagios@localhost +admin_email=info@semtech-solutions.co.nz +admin_pager=pagenagios@localhost + + + +# DAEMON CORE DUMP OPTION +# This option determines whether or not Nagios is allowed to create +# a core dump when it runs as a daemon. Note that it is generally +# considered bad form to allow this, but it may be useful for +# debugging purposes. Enabling this option doesn't guarantee that +# a core file will be produced, but that's just life... +# Values: 1 - Allow core dumps +# 0 - Do not allow core dumps (default) + +daemon_dumps_core=0 + + + +# LARGE INSTALLATION TWEAKS OPTION +# This option determines whether or not Nagios will take some shortcuts +# which can save on memory and CPU usage in large Nagios installations. +# Read the documentation for more information on the benefits/tradeoffs +# of enabling this option. +# Values: 1 - Enabled tweaks +# 0 - Disable tweaks (default) + +use_large_installation_tweaks=0 + + + +# ENABLE ENVIRONMENT MACROS +# This option determines whether or not Nagios will make all standard +# macros available as environment variables when host/service checks +# and system commands (event handlers, notifications, etc.) are +# executed. Enabling this option can cause performance issues in +# large installations, as it will consume a bit more memory and (more +# importantly) consume more CPU. +# Values: 1 - Enable environment variable macros (default) +# 0 - Disable environment variable macros + +enable_environment_macros=1 + + + +# CHILD PROCESS MEMORY OPTION +# This option determines whether or not Nagios will free memory in +# child processes (processed used to execute system commands and host/ +# service checks). If you specify a value here, it will override +# program defaults. +# Value: 1 - Free memory in child processes +# 0 - Do not free memory in child processes + +#free_child_process_memory=1 + + + +# CHILD PROCESS FORKING BEHAVIOR +# This option determines how Nagios will fork child processes +# (used to execute system commands and host/service checks). Normally +# child processes are fork()ed twice, which provides a very high level +# of isolation from problems. Fork()ing once is probably enough and will +# save a great deal on CPU usage (in large installs), so you might +# want to consider using this. If you specify a value here, it will +# program defaults. +# Value: 1 - Child processes fork() twice +# 0 - Child processes fork() just once + +#child_processes_fork_twice=1 + + + +# DEBUG LEVEL +# This option determines how much (if any) debugging information will +# be written to the debug file. OR values together to log multiple +# types of information. +# Values: +# -1 = Everything +# 0 = Nothing +# 1 = Functions +# 2 = Configuration +# 4 = Process information +# 8 = Scheduled events +# 16 = Host/service checks +# 32 = Notifications +# 64 = Event broker +# 128 = External commands +# 256 = Commands +# 512 = Scheduled downtime +# 1024 = Comments +# 2048 = Macros + +debug_level=0 + + + +# DEBUG VERBOSITY +# This option determines how verbose the debug log out will be. +# Values: 0 = Brief output +# 1 = More detailed +# 2 = Very detailed + +debug_verbosity=1 + + + +# DEBUG FILE +# This option determines where Nagios should write debugging information. + +debug_file=/var/log/nagios/nagios.debug + + + +# MAX DEBUG FILE SIZE +# This option determines the maximum size (in bytes) of the debug file. If +# the file grows larger than this size, it will be renamed with a .old +# extension. If a file already exists with a .old extension it will +# automatically be deleted. This helps ensure your disk space usage doesn't +# get out of control when debugging Nagios. + +max_debug_file_size=1000000 + + diff --git a/package/chapter 7/nagios/nrpe.cfg b/package/chapter 7/nagios/nrpe.cfg new file mode 100644 index 0000000..cc9b783 --- /dev/null +++ b/package/chapter 7/nagios/nrpe.cfg @@ -0,0 +1,235 @@ +############################################################################# +# Sample NRPE Config File +# Written by: Ethan Galstad (nagios@nagios.org) +# +# Last Modified: 11-23-2007 +# +# NOTES: +# This is a sample configuration file for the NRPE daemon. It needs to be +# located on the remote host that is running the NRPE daemon, not the host +# from which the check_nrpe client is being executed. +############################################################################# + + +# LOG FACILITY +# The syslog facility that should be used for logging purposes. + +log_facility=daemon + + + +# PID FILE +# The name of the file in which the NRPE daemon should write it's process ID +# number. The file is only written if the NRPE daemon is started by the root +# user and is running in standalone mode. + +pid_file=/var/run/nrpe/nrpe.pid + + + +# PORT NUMBER +# Port number we should wait for connections on. +# NOTE: This must be a non-priviledged port (i.e. > 1024). +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +server_port=5666 + + + +# SERVER ADDRESS +# Address that nrpe should bind to in case there are more than one interface +# and you do not want nrpe to bind on all interfaces. +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +#server_address=127.0.0.1 + + + +# NRPE USER +# This determines the effective user that the NRPE daemon should run as. +# You can either supply a username or a UID. +# +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +nrpe_user=nrpe + + + +# NRPE GROUP +# This determines the effective group that the NRPE daemon should run as. +# You can either supply a group name or a GID. +# +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +nrpe_group=nrpe + + + +# ALLOWED HOST ADDRESSES +# This is an optional comma-delimited list of IP address or hostnames +# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask +# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently +# supported. +# +# Note: The daemon only does rudimentary checking of the client's IP +# address. I would highly recommend adding entries in your /etc/hosts.allow +# file to allow only the specified host to connect to the port +# you are running this daemon on. +# +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +allowed_hosts=192.168.1.107 + + + +# COMMAND ARGUMENT PROCESSING +# This option determines whether or not the NRPE daemon will allow clients +# to specify arguments to commands that are executed. This option only works +# if the daemon was configured with the --enable-command-args configure script +# option. +# +# *** ENABLING THIS OPTION IS A SECURITY RISK! *** +# Read the SECURITY file for information on some of the security implications +# of enabling this variable. +# +# Values: 0=do not allow arguments, 1=allow command arguments + +dont_blame_nrpe=1 + + + +# BASH COMMAND SUBTITUTION +# This option determines whether or not the NRPE daemon will allow clients +# to specify arguments that contain bash command substitutions of the form +# $(...). This option only works if the daemon was configured with both +# the --enable-command-args and --enable-bash-command-substitution configure +# script options. +# +# *** ENABLING THIS OPTION IS A HIGH SECURITY RISK! *** +# Read the SECURITY file for information on some of the security implications +# of enabling this variable. +# +# Values: 0=do not allow bash command substitutions, +# 1=allow bash command substitutions + +allow_bash_command_substitution=0 + + + +# COMMAND PREFIX +# This option allows you to prefix all commands with a user-defined string. +# A space is automatically added between the specified prefix string and the +# command line from the command definition. +# +# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! *** +# Usage scenario: +# Execute restricted commmands using sudo. For this to work, you need to add +# the nagios user to your /etc/sudoers. An example entry for alllowing +# execution of the plugins from might be: +# +# nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/ +# +# This lets the nagios user run all commands in that directory (and only them) +# without asking for a password. If you do this, make sure you don't give +# random users write access to that directory or its contents! + +# command_prefix=/usr/bin/sudo + + + +# DEBUGGING OPTION +# This option determines whether or not debugging messages are logged to the +# syslog facility. +# Values: 0=debugging off, 1=debugging on + +debug=0 + + + +# COMMAND TIMEOUT +# This specifies the maximum number of seconds that the NRPE daemon will +# allow plugins to finish executing before killing them off. + +command_timeout=60 + + + +# CONNECTION TIMEOUT +# This specifies the maximum number of seconds that the NRPE daemon will +# wait for a connection to be established before exiting. This is sometimes +# seen where a network problem stops the SSL being established even though +# all network sessions are connected. This causes the nrpe daemons to +# accumulate, eating system resources. Do not set this too low. + +connection_timeout=300 + + + +# WEEK RANDOM SEED OPTION +# This directive allows you to use SSL even if your system does not have +# a /dev/random or /dev/urandom (on purpose or because the necessary patches +# were not applied). The random number generator will be seeded from a file +# which is either a file pointed to by the environment valiable $RANDFILE +# or $HOME/.rnd. If neither exists, the pseudo random number generator will +# be initialized and a warning will be issued. +# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness + +#allow_weak_random_seed=1 + + + +# INCLUDE CONFIG FILE +# This directive allows you to include definitions from an external config file. + +#include= + + + +# COMMAND DEFINITIONS +# Command definitions that this daemon will run. Definitions +# are in the following format: +# +# command[]= +# +# When the daemon receives a request to return the results of +# it will execute the command specified by the argument. +# +# Unlike Nagios, the command line cannot contain macros - it must be +# typed exactly as it should be executed. +# +# Note: Any plugins that are used in the command lines must reside +# on the machine that this daemon is running on! The examples below +# assume that you have plugins installed in a /usr/local/nagios/libexec +# directory. Also note that you will have to modify the definitions below +# to match the argument format the plugins expect. Remember, these are +# examples only! + + +# The following examples use hardcoded command arguments... + +command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10 +command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20 +command[check_root]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p / +command[check_home]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /home +command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z +command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200 + + +# The following examples allow user-supplied arguments and can +# only be used if the NRPE daemon was compiled with support for +# command arguments *AND* the dont_blame_nrpe directive in this +# config file is set to '1'. This poses a potential security risk, so +# make sure you read the SECURITY file before doing this. + +#command[check_users]=/usr/lib/nagios/plugins/check_users -w $ARG1$ -c $ARG2$ +#command[check_load]=/usr/lib/nagios/plugins/check_load -w $ARG1$ -c $ARG2$ +#command[check_disk]=/usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ +#command[check_procs]=/usr/lib/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ + + + +# INCLUDE CONFIG DIRECTORY +# This directive allows you to include definitions from config files (with a +# .cfg extension) in one or more directories (with recursion). + +include_dir=/etc/nrpe.d/ diff --git a/package/chapter 9/hive/Hive QL.txt b/package/chapter 9/hive/Hive QL.txt new file mode 100644 index 0000000..72064a2 --- /dev/null +++ b/package/chapter 9/hive/Hive QL.txt @@ -0,0 +1,192 @@ + +-------------------------------------------------- +-- create database and use it +-------------------------------------------------- + +CREATE DATABASE IF NOT EXISTS trade; + +USE trade; + +-------------------------------------------------- +-- external tables +-------------------------------------------------- + +CREATE TABLE IF NOT EXISTS +trade.rawtrans +( +dept STRING, +entity STRING, +paydate STRING, +exptype STRING, +exparea STRING, +supplier STRING, +trans STRING, +amount DOUBLE +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +LOCATION '/data/uk_trade'; + + +-------------------------------------------------- +-- registering and using udf's +-------------------------------------------------- + +add jar /home/hadoop/hive/udf/target/scala-2.10/dateconv_2.10-0.1.jar; + +create temporary function DateConv as 'nz.co.semtechsolutions.hive.udf.DateConv'; + +-------------------------------------------------- +-- create table +-------------------------------------------------- + +CREATE TABLE IF NOT EXISTS +trade.suppliertot +( +payyear INT, +paymonth INT, +supplier STRING, +totamount DOUBLE +); + +CREATE TABLE IF NOT EXISTS +trade.suppliertot +AS +SELECT + year(DateConv (paydate) ) as payyear, + month(DateConv (paydate) ) as paymonth, + supplier, + SUM(amount) as totamount +FROM + trade.rawtrans +GROUP BY + year(DateConv (paydate) ) , + month(DateConv (paydate) ) , + supplier ; + +-------------------------------------------------- +-- remove table +-------------------------------------------------- + +DROP TABLE trade.suppliertot; + +-------------------------------------------------- +-- select statements +-------------------------------------------------- + +SELECT COUNT(*) FROM trade.rawtrans + +SELECT + year( DateConv (paydate) ) as payyear, + month(DateConv (paydate) ) as paymonth, + supplier, + SUM(amount) as totamount +FROM + trade.rawtrans +GROUP BY + year( DateConv (paydate) ) , + month(DateConv (paydate) ) , + supplier ; + +SELECT * FROM trade.suppliertot WHERE supplier LIKE 'UNIVERSITY%' ; + +-------------------------------------------------- +-- where clause +-------------------------------------------------- + +SELECT + year( DateConv (paydate) ) as payyear, + month(DateConv (paydate) ) as paymonth, + supplier, + SUM(amount) as totamount +FROM + trade.rawtrans +WHERE + supplier NOT LIKE 'UK Trade%' AND + supplier NOT LIKE 'Corporate%' +GROUP BY + year( DateConv (paydate) ) , + month(DateConv (paydate) ) , + supplier ; + + +-------------------------------------------------- +-- sub query +-------------------------------------------------- + +SELECT + DateConv (b.paydate) as paydate, + b.supplier, + b.amount +FROM + ( + SELECT a.* FROM trade.rawtrans a WHERE a.supplier LIKE '%INDIA%' + ) b ; + +SELECT + DateConv (b.paydate) as paydate, + b.supplier, + b.amount +FROM + FROM trade.rawtrans b +WHERE + b.supplier IN ( SELECT a.supplier FROM trade.uksupplier a ) + +-------------------------------------------------- +-- table joins +-------------------------------------------------- + +SELECT + a.dept, + a.supplier, + b.amount +FROM +( + SELECT DISTINCT + c.dept,c.supplier + FROM + trade.rawtrans c +) a +JOIN +( + SELECT DISTINCT + d.supplier,d.amount + FROM + trade.rawtrans d +) b +ON ( a.supplier = b.supplier ) ; + +-------------------------------------------------- +-- inserts +-------------------------------------------------- + +INSERT INTO TABLE trade.suppliertot + SELECT payyear,paymonth,'UNIVERSITY OF SEMTECH',700.0 FROM + trade.suppliertot WHERE supplier LIKE 'UNIVERSITY%' ; + +INSERT OVERWRITE TABLE trade.suppliertot + SELECT payyear,paymonth,'UNIVERSITY OF SEMTECH',950.0 FROM + trade.suppliertot WHERE supplier LIKE 'UNIVERSITY%' ; + +-------------------------------------------------- +-- ordering +-------------------------------------------------- + +SELECT supplier, COUNT(*) FROM trade.rawtrans GROUP BY supplier ORDER BY supplier DESC ; + +-------------------------------------------------- +-- having +-------------------------------------------------- + +SELECT + supplier, COUNT(*) +FROM + trade.rawtrans +GROUP BY + supplier +HAVING COUNT(*) > 1000 +ORDER BY + supplier DESC ; + + + + diff --git a/package/chapter 9/hive/udf/DateConv.java b/package/chapter 9/hive/udf/DateConv.java new file mode 100644 index 0000000..6a66617 --- /dev/null +++ b/package/chapter 9/hive/udf/DateConv.java @@ -0,0 +1,36 @@ +package nz.co.semtechsolutions.hive.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.Text; +import java.text.SimpleDateFormat; +import java.util.Date; + +class DateConv extends UDF +{ + + public Text evaluate(Text s) + { + + Text to_value = new Text(""); + + if (s != null) + { + try + { + + SimpleDateFormat incommingDateFormat = new SimpleDateFormat("dd/MM/yyyy"); + SimpleDateFormat convertedDateFormat = new SimpleDateFormat("yyyy-MM-dd"); + + Date parsedate = incommingDateFormat.parse( s.toString() ); + + to_value.set( convertedDateFormat.format(parsedate) ); + + } + catch (Exception e) + { + to_value = new Text(s); + } + } + return to_value; + } +} diff --git a/package/chapter 9/hive/udf/build.sbt b/package/chapter 9/hive/udf/build.sbt new file mode 100644 index 0000000..e2e4688 --- /dev/null +++ b/package/chapter 9/hive/udf/build.sbt @@ -0,0 +1,13 @@ +name := "DateConv" + +version := "0.1" + +organization := "nz.co.semtechsolutions" + +scalaVersion := "2.10.4" + +resolvers += "CDH4" at "https://repository.cloudera.com/artifactory/cloudera-repos/" + +libraryDependencies += "org.apache.hadoop" % "hadoop-core" % "0.20.2" % "provided" + +libraryDependencies += "org.apache.hive" % "hive-exec" % "0.10.0" % "provided" diff --git a/package/chapter 9/hive/udf/sbt commands.txt b/package/chapter 9/hive/udf/sbt commands.txt new file mode 100644 index 0000000..5843352 --- /dev/null +++ b/package/chapter 9/hive/udf/sbt commands.txt @@ -0,0 +1,40 @@ + +The commands used with sbt in the book were + +compile +package +exit + +if you want to know the full range of commands available - just use help i.e. + + +[hadoop@hc1nn udf]$ sbt +Loading /usr/share/sbt/bin/sbt-launch-lib.bash +[info] Set current project to DateConv (in build file:/home/hadoop/hive/udf/) + +> help + + help Displays this help message or prints detailed help on requested commands (run 'help '). + about Displays basic information about sbt and the build. + tasks Lists the tasks defined for the current project. + settings Lists the settings defined for the current project. + reload (Re)loads the project in the current directory + projects Lists the names of available projects or temporarily adds/removes extra builds to the session. + project Displays the current project or changes to the provided `project`. + set [every] Evaluates a Setting and applies it to the current project. + session Manipulates session settings. For details, run 'help session'. + inspect [uses|tree|definitions] Prints the value for 'key', the defining scope, delegates, related definitions, and dependencies. + Sets the logging level to 'log-level'. Valid levels: debug, info, warn, error + ; (; )* Runs the provided semicolon-separated commands. + ~ Executes the specified command whenever source files change. + last Displays output from a previous command or the output from a specific task. + last-grep Shows lines from the last output for 'key' that match 'pattern'. + export + Executes tasks and displays the equivalent command lines. + exit Terminates the build. + -- Schedules a command to run before other commands on startup. + show Displays the result of evaluating the setting or task associated with 'key'. + +More command help available using 'help ' for: + !, +, ++, <, alias, append, apply, eval, iflast, onFailure, reboot, shell + +> diff --git a/package/chapter 9/impala/Impala QL.txt b/package/chapter 9/impala/Impala QL.txt new file mode 100644 index 0000000..e816b0c --- /dev/null +++ b/package/chapter 9/impala/Impala QL.txt @@ -0,0 +1,138 @@ + +-------------------------------------------------- +-- create database and use it +-------------------------------------------------- + +CREATE DATABASE fuel ; + +USE fuel ; + +-------------------------------------------------- +-- external tables +-------------------------------------------------- + +CREATE EXTERNAL TABLE fuel.consumption +( + myear STRING, + manufacturer STRING, + model STRING, + fclass STRING, + enginesz STRING, + cylinders STRING, + transmission STRING, + fuel STRING, + consumption1 STRING, + consumption2 STRING, + consumption3 STRING, + consumption4 STRING, + avefuel STRING, + co2 STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +LOCATION '/user/hue2/fuel_consumption/'; + + +-------------------------------------------------- +-- remove a table +-------------------------------------------------- + +DROP TABLE fuel.consumption ; + +-------------------------------------------------- +-- create empty table +-------------------------------------------------- + +CREATE TABLE fuel.consumption2 +( + myear STRING, + manufacturer STRING, + model STRING, + fclass STRING, + enginesz STRING, + cylinders STRING, + transmission STRING, + fuel STRING, + consumption1 STRING, + consumption2 STRING, + consumption3 STRING, + consumption4 STRING, + avefuel STRING, + co2 STRING +); + + +-------------------------------------------------- +-- list tables and show content +-------------------------------------------------- + +SHOW TABLES ; + +DESCRIBE fuel.consumption; + +-------------------------------------------------- +-- select statements +-------------------------------------------------- + +SELECT * FROM fuel.customer ; + +SELECT * from fuel.consumption ; + +SELECT myear, manufacturer from fuel.consumption ; + +-------------------------------------------------- +-- where clauses +-------------------------------------------------- + +SELECT * from fuel.consumption WHERE manufacturer = 'ACURA' ; + +SELECT * from fuel.consumption WHERE myear = '1995' AND manufacturer = 'AUDI' ; + +SELECT * from fuel.consumption WHERE myear = '1995' OR manufacturer = 'AUDI' ; + + +-------------------------------------------------- +-- sub queries +-------------------------------------------------- + +SELECT rd.* FROM +( + SELECT + myear,manufacturer,model,enginesz,cylinders + FROM + fuel.consumption + +) rd ; + + +SELECT + fl.* +FROM + fuel.consumption fl +WHERE + fl.enginesz > ( SELECT AVG(st.enginesz) FROM fuel.consumption st ) + +-------------------------------------------------- +-- table joins +-------------------------------------------------- + +SELECT + rd1.* +FROM + fuel.consumption rd1, + fuel.consumption3 rd2 +WHERE + rd1.myear = rd2.myear AND + rd1. manufacturer = rd2. manufacturer AND + rd1. model = rd2. model + +-------------------------------------------------- +-- inserts +-------------------------------------------------- + +INSERT INTO fuel.consumption2 VALUES ('1995','ACURA','INTEGRA','SUBCOMPACT','1.8','4','A4','X','10.2','7','28','40','1760','202'); + +INSERT INTO TABLE fuel.consumption2 SELECT * FROM fuel.consumption3 + + + + diff --git a/package/chapter 9/spark/Spark QL.txt b/package/chapter 9/spark/Spark QL.txt new file mode 100644 index 0000000..1452ea6 --- /dev/null +++ b/package/chapter 9/spark/Spark QL.txt @@ -0,0 +1,66 @@ + +--------------------------------------------------------------------------- +-- start spark shell +--------------------------------------------------------------------------- + +spark-shell --master spark://hc2nn.semtech-solutions.co.nz:7077 + +--------------------------------------------------------------------------- +-- scala script +--------------------------------------------------------------------------- + +val myFile = sc.textFile("/tmp/scala.csv") + +myFile.count() + +myFile.filter(line => line.contains("ACURA")).count() + +--------------------------------------------------------------------------- +-- scala submit +--------------------------------------------------------------------------- + +spark-submit \ + --class org.apache.spark.examples.SparkPi \ + --master spark://hc2nn.semtech-solutions.co.nz:7077 \ + --executor-memory 700M \ + --total-executor-cores 10 \ + /usr/lib/spark/examples/lib/spark-examples_2.10-1.0.0-cdh5.1.2.jar \ + 10000 + +--------------------------------------------------------------------------- +-- scala SQL +--------------------------------------------------------------------------- + +val sqlContext = new org.apache.spark.sql.SQLContext(sc) + +import sqlContext._ + +case class Vehicle(year: Int,manufacturer: String, model: String, vclass: String, engine: Double, cylinders: Int, fuel: String, consumption: String, clkm: Double, hlkm: Double, cmpg: Int, hmpg: Int, co2lyr: Int, co2gkm: Int) + +val vehicle = sc.textFile("/tmp/scala.csv").map(_.split(",")).map(p => Vehicle( +p(0).trim.toInt, +p(1), +p(2), +p(3), +p(4).trim.toDouble, +p(5).trim.toInt, +p(6), +p(7), +p(8).trim.toDouble, +p(9).trim.toDouble, +p(10).trim.toInt, +p(11).trim.toInt, +p(12).trim.toInt, +p(13).trim.toInt +)) + + +vehicle.registerAsTable("vehicle") + +val aston = sql( "SELECT year, manufacturer, model, vclass, engine FROM vehicle WHERE manufacturer = 'ASTON MARTIN' ") + +aston.map( t => "year: " + t(0) + " manufacturer " + t(1) + " model " + t(2) + " class " + t(3) + " engine " + t(4) ).collect().foreach(println) + + + +