From e8ea375da48054943a9697b96eea1971124e6070 Mon Sep 17 00:00:00 2001 From: Shaun Sabo Date: Wed, 9 Nov 2016 10:47:05 -0800 Subject: [PATCH 1/4] fix tool so it works in clean environment --- run-example.sh | 68 +++++++++++++++++++++++++++++++++++++++++-------- stop-example.sh | 4 +-- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/run-example.sh b/run-example.sh index 1fb6eba..994a73b 100644 --- a/run-example.sh +++ b/run-example.sh @@ -6,6 +6,9 @@ # Should run on linux/OSX. ####################### +# Enable to see commands run for debugging +#set -x + # this gets hard coded in the configs - keep in sync tmpdir=/tmp/solr-map-reduce @@ -52,7 +55,7 @@ rm -f -r $tmpdir # get hadoop if [ ! -f "$hadoop_distrib.tar.gz" ]; then - curl -o $hadoop_distrib.tar.gz "$hadoop_distrib_url" + curl -o $hadoop_distrib.tar.gz "$hadoop_distrib_url" if [[ $? -ne 0 ]] then echo "Failed to download hadoop at $hadoop_distrib" @@ -102,6 +105,13 @@ else echo "$solr_distrib.tgz already extracted" fi +# tar -zxf +cd $solr_distrib +unzip -o server/webapps/solr.war -d server/solr-webapp/webapp +# Patch map reduce environment for MacOS support. (sed expression shipped with solr only works on linux) +#patch server/scripts/map-reduce/set-map-reduce-classpath.sh < ../solr-map-reduce-classpath.sh.diff +cd .. + ## Harmonize Conflicting Jar Dependencies ####################### @@ -112,7 +122,7 @@ find $hadoop_distrib -name "httpclient-*.jar" -type f -exec rm {} \; find $hadoop_distrib -name "httpcore-*.jar" -type f -exec rm {} \; solr_client=$solr_distrib/server/solr-webapp/webapp/WEB-INF/lib/httpclient-$solr_http_client_version.jar -solr_core=$solr_distrib/server/solr-webapp/webapp/WEB-INF/lib/httpcore-$solr_http_client_version.jar +solr_corer=$solr_distrib/server/solr-webapp/webapp/WEB-INF/lib/httpcore-$solr_http_client_version.jar cp $solr_client $hadoop_distrib/share/hadoop/tools/lib cp $solr_corer $hadoop_distrib/share/hadoop/tools/lib @@ -127,6 +137,9 @@ cp $solr_client $hadoop_distrib/share/hadoop/common/lib cp $solr_corer $hadoop_distrib/share/hadoop/common/lib +# Fix JAVA_HOME evaluation on OSX +#sed -e "s/\(export JAVA_HOME=\)(\(.*\))/\1\2/" -i -f $hadoop_distrib/libexec/hadoop-config.sh + ## Start HDFS+YARN ####################### @@ -196,7 +209,7 @@ rm -r -f server/solr/collection1 rm -f server/server.log # tar -zxf -unzip -o server/webapps/solr.war -d server/solr-webapp/webapp +#unzip -o server/webapps/solr.war -d server/solr-webapp/webapp # fix bad paths in release sed -i -- 's/example/server/g' server/scripts/map-reduce/set-map-reduce-classpath.sh @@ -210,13 +223,37 @@ echo "Upload config files to zookeeper..." java -classpath "server/solr-webapp/webapp/WEB-INF/lib/*:server/lib/ext/*" org.apache.solr.cloud.ZkCLI -zkhost 127.0.0.1:9983 -cmd upconfig --confdir server/solr/configsets/basic_configs/conf --confname basic_configs -runzk 8983 -solrhome server/solr +echo "Starting Solr Server 1" cd server -java -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --module=http --stop -java -Xmx512m -DzkRun -DnumShards=2 -Dsolr.directoryFactory=solr.HdfsDirectoryFactory -Dsolr.lock.type=hdfs -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr1 -Dsolr.hdfs.confdir=$hadoop_conf_dir -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --module=http 1>server.log 2>&1 & - +java -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --module=http --stop 2>/dev/null +java -Xmx512m \ + -Djetty.port=8983 \ + -DzkRun \ + -DnumShards=2 \ + -Dsolr.directoryFactory=solr.HdfsDirectoryFactory \ + -Dsolr.lock.type=hdfs \ + -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr1 \ + -Dsolr.hdfs.confdir=$hadoop_conf_dir \ + -DSTOP.PORT=7983 \ + -DSTOP.KEY=key \ + -jar start.jar \ + --module=http 1>server.log 2>&1 & + +echo "Starting Solr Server 2" cd ../server2 -java -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --module=http --stop -java -Xmx512m -Djetty.port=7574 -DzkHost=127.0.0.1:9983 -DnumShards=2 -Dsolr.directoryFactory=solr.HdfsDirectoryFactory -Dsolr.lock.type=hdfs -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr2 -Dsolr.hdfs.confdir=$hadoop_conf_dir -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --module=http 1>server2.log 2>&1 & +java -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --module=http --stop 2>/dev/null +java -Xmx512m \ + -Djetty.port=7574 \ + -DzkHost=127.0.0.1:9983 \ + -DnumShards=2 \ + -Dsolr.directoryFactory=solr.HdfsDirectoryFactory \ + -Dsolr.lock.type=hdfs \ + -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr2 \ + -Dsolr.hdfs.confdir=$hadoop_conf_dir \ + -DSTOP.PORT=6574 \ + -DSTOP.KEY=key \ + -jar start.jar \ + --module=http 1>server2.log 2>&1 & # wait for solr to be ready sleep 15 @@ -225,10 +262,21 @@ curl "127.0.0.1:8983/solr/admin/collections?action=CREATE&name=collection1&numSh cd ../.. -# +# ## Build an index with map-reduce and deploy it to SolrCloud ####################### +echo "Building solr index with map reduce" source $solr_distrib/server/scripts/map-reduce/set-map-reduce-classpath.sh -$hadoop_distrib/bin/hadoop --config $hadoop_conf_dir jar $solr_distrib/dist/solr-map-reduce-*.jar -D 'mapred.child.java.opts=-Xmx500m' -libjars "$HADOOP_LIBJAR" --morphline-file readAvroContainer.conf --zk-host 127.0.0.1:9983 --output-dir hdfs://127.0.0.1:8020/outdir --collection $collection --log4j log4j.properties --go-live --verbose "hdfs://127.0.0.1:8020/indir" +$hadoop_distrib/bin/hadoop --config $hadoop_conf_dir \ + jar $solr_distrib/dist/solr-map-reduce-*.jar \ + -D 'mapred.child.java.opts=-Xmx500m' \ + -libjars "$HADOOP_LIBJAR" \ + --morphline-file readAvroContainer.conf \ + --zk-host 127.0.0.1:9983 \ + --output-dir hdfs://127.0.0.1:8020/outdir \ + --collection $collection \ + --log4j log4j.properties \ + --go-live \ + --verbose "hdfs://127.0.0.1:8020/indir" diff --git a/stop-example.sh b/stop-example.sh index 446710b..74187cc 100755 --- a/stop-example.sh +++ b/stop-example.sh @@ -21,8 +21,8 @@ hadoop-*/sbin/yarn-daemon.sh --config $hadoop_conf_dir stop nodemanager cd solr* -cd example +cd server java -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --stop -cd ../example2 +cd ../server2 java -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --stop From f45df4ac557505fe2ef487cdfa5bd0594a5d6ed1 Mon Sep 17 00:00:00 2001 From: Shaun Sabo Date: Wed, 9 Nov 2016 10:47:23 -0800 Subject: [PATCH 2/4] Add vagrant config --- .gitignore | 1 + Vagrantfile | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 Vagrantfile diff --git a/.gitignore b/.gitignore index a8b57a0..3325e2b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /hadoop-* *~ example*.log +.vagrant diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 0000000..5917b0c --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,80 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# All Vagrant configuration is done below. The "2" in Vagrant.configure +# configures the configuration version (we support older styles for +# backwards compatibility). Please don't change it unless you know what +# you're doing. +Vagrant.configure("2") do |config| + # The most common configuration options are documented and commented below. + # For a complete reference, please see the online documentation at + # https://docs.vagrantup.com. + + # Every Vagrant development environment requires a box. You can search for + # boxes at https://atlas.hashicorp.com/search. + config.vm.box = "ubuntu/trusty64" + + # Disable automatic box update checking. If you disable this, then + # boxes will only be checked for updates when the user runs + # `vagrant box outdated`. This is not recommended. + # config.vm.box_check_update = false + + # Create a forwarded port mapping which allows access to a specific port + # within the machine from a port on the host machine. In the example below, + # accessing "localhost:8080" will access port 80 on the guest machine. + # config.vm.network "forwarded_port", guest: 80, host: 8080 + config.vm.network "forwarded_port", guest: 8983, host: 8983 + config.vm.network "forwarded_port", guest: 7574, host: 7574 + config.vm.network "forwarded_port", guest: 8088, host: 8088 + config.vm.network "forwarded_port", guest: 8042, host: 8042 + + # Create a private network, which allows host-only access to the machine + # using a specific IP. + # config.vm.network "private_network", ip: "192.168.33.10" + + # Create a public network, which generally matched to bridged network. + # Bridged networks make the machine appear as another physical device on + # your network. + # config.vm.network "public_network" + + # Share an additional folder to the guest VM. The first argument is + # the path on the host to the actual folder. The second argument is + # the path on the guest to mount the folder. And the optional third + # argument is a set of non-required options. + # config.vm.synced_folder "../data", "/vagrant_data" + + # Provider-specific configuration so you can fine-tune various + # backing providers for Vagrant. These expose provider-specific options. + # Example for VirtualBox: + # + config.vm.provider "virtualbox" do |vb| + # Display the VirtualBox GUI when booting the machine + # vb.gui = true + + # Customize the amount of memory on the VM: + vb.memory = "4096" + vb.cpus = 4 + end + # + # View the documentation for the provider you are using for more + # information on available options. + + # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies + # such as FTP and Heroku are also available. See the documentation at + # https://docs.vagrantup.com/v2/push/atlas.html for more information. + # config.push.define "atlas" do |push| + # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" + # end + + # Enable provisioning with a shell script. Additional provisioners such as + # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the + # documentation for more information about their specific syntax and use. + # config.vm.provision "shell", inline: <<-SHELL + # apt-get update + # apt-get install -y apache2 + # SHELL + config.vm.provision "shell", inline: <<-SHELL + apt-get install -y unzip openjdk-7-jdk + echo "JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/" >> /etc/environment + SHELL +end From abc36d48d29dc9c5167d0edfcdc920fbc3f5b6a0 Mon Sep 17 00:00:00 2001 From: Shaun Sabo Date: Wed, 9 Nov 2016 10:51:31 -0800 Subject: [PATCH 3/4] remove solr unzip line that was moved up in script --- run-example.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/run-example.sh b/run-example.sh index 994a73b..f01e7f0 100644 --- a/run-example.sh +++ b/run-example.sh @@ -208,9 +208,6 @@ rm -r -f server/solr/zoo_data rm -r -f server/solr/collection1 rm -f server/server.log -# tar -zxf -#unzip -o server/webapps/solr.war -d server/solr-webapp/webapp - # fix bad paths in release sed -i -- 's/example/server/g' server/scripts/map-reduce/set-map-reduce-classpath.sh From c6f33a30472366afe8eb558b0a5f36b23ccb5b9c Mon Sep 17 00:00:00 2001 From: Shaun Sabo Date: Wed, 9 Nov 2016 10:58:03 -0800 Subject: [PATCH 4/4] pull tarballs from apache archives the solr tarball was not available anymore from the location we were trying to pull from since it was not the latest version --- run-example.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run-example.sh b/run-example.sh index f01e7f0..50a6c97 100644 --- a/run-example.sh +++ b/run-example.sh @@ -17,11 +17,11 @@ tmpdir=/tmp/solr-map-reduce solr_ver="5.2.1" solr_distrib="solr-$solr_ver" -solr_distrib_url="http://apache.mirrors.hoobly.com/lucene/solr/$solr_ver/$solr_distrib.tgz" +solr_distrib_url="http://archive.apache.org/dist/lucene/solr/$solr_ver/$solr_distrib.tgz" # you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/ hadoop_distrib="hadoop-2.6.0" -hadoop_distrib_url="http://www.trieuvan.com/apache/hadoop/common/$hadoop_distrib/$hadoop_distrib.tar.gz" +hadoop_distrib_url="http://archive.apache.org/dist/hadoop/common/$hadoop_distrib/$hadoop_distrib.tar.gz" ######################################################### # NameNode port: 8020, DataNode ports: 50010, 50020, ResourceManager port: 8032 ZooKeeper port: 9983, Solr port: 8983