Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/hadoop-*
*~
example*.log
.vagrant
80 changes: 80 additions & 0 deletions Vagrantfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :

# All Vagrant configuration is done below. The "2" in Vagrant.configure
# configures the configuration version (we support older styles for
# backwards compatibility). Please don't change it unless you know what
# you're doing.
Vagrant.configure("2") do |config|
# The most common configuration options are documented and commented below.
# For a complete reference, please see the online documentation at
# https://docs.vagrantup.com.

# Every Vagrant development environment requires a box. You can search for
# boxes at https://atlas.hashicorp.com/search.
config.vm.box = "ubuntu/trusty64"

# Disable automatic box update checking. If you disable this, then
# boxes will only be checked for updates when the user runs
# `vagrant box outdated`. This is not recommended.
# config.vm.box_check_update = false

# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine. In the example below,
# accessing "localhost:8080" will access port 80 on the guest machine.
# config.vm.network "forwarded_port", guest: 80, host: 8080
config.vm.network "forwarded_port", guest: 8983, host: 8983
config.vm.network "forwarded_port", guest: 7574, host: 7574
config.vm.network "forwarded_port", guest: 8088, host: 8088
config.vm.network "forwarded_port", guest: 8042, host: 8042

# Create a private network, which allows host-only access to the machine
# using a specific IP.
# config.vm.network "private_network", ip: "192.168.33.10"

# Create a public network, which generally matched to bridged network.
# Bridged networks make the machine appear as another physical device on
# your network.
# config.vm.network "public_network"

# Share an additional folder to the guest VM. The first argument is
# the path on the host to the actual folder. The second argument is
# the path on the guest to mount the folder. And the optional third
# argument is a set of non-required options.
# config.vm.synced_folder "../data", "/vagrant_data"

# Provider-specific configuration so you can fine-tune various
# backing providers for Vagrant. These expose provider-specific options.
# Example for VirtualBox:
#
config.vm.provider "virtualbox" do |vb|
# Display the VirtualBox GUI when booting the machine
# vb.gui = true

# Customize the amount of memory on the VM:
vb.memory = "4096"
vb.cpus = 4
end
#
# View the documentation for the provider you are using for more
# information on available options.

# Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
# such as FTP and Heroku are also available. See the documentation at
# https://docs.vagrantup.com/v2/push/atlas.html for more information.
# config.push.define "atlas" do |push|
# push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
# end

# Enable provisioning with a shell script. Additional provisioners such as
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
# documentation for more information about their specific syntax and use.
# config.vm.provision "shell", inline: <<-SHELL
# apt-get update
# apt-get install -y apache2
# SHELL
config.vm.provision "shell", inline: <<-SHELL
apt-get install -y unzip openjdk-7-jdk
echo "JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/" >> /etc/environment
SHELL
end
73 changes: 59 additions & 14 deletions run-example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# Should run on linux/OSX.
#######################

# Enable to see commands run for debugging
#set -x

# this gets hard coded in the configs - keep in sync
tmpdir=/tmp/solr-map-reduce

Expand All @@ -14,11 +17,11 @@ tmpdir=/tmp/solr-map-reduce

solr_ver="5.2.1"
solr_distrib="solr-$solr_ver"
solr_distrib_url="http://apache.mirrors.hoobly.com/lucene/solr/$solr_ver/$solr_distrib.tgz"
solr_distrib_url="http://archive.apache.org/dist/lucene/solr/$solr_ver/$solr_distrib.tgz"

# you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/
hadoop_distrib="hadoop-2.6.0"
hadoop_distrib_url="http://www.trieuvan.com/apache/hadoop/common/$hadoop_distrib/$hadoop_distrib.tar.gz"
hadoop_distrib_url="http://archive.apache.org/dist/hadoop/common/$hadoop_distrib/$hadoop_distrib.tar.gz"

#########################################################
# NameNode port: 8020, DataNode ports: 50010, 50020, ResourceManager port: 8032 ZooKeeper port: 9983, Solr port: 8983
Expand Down Expand Up @@ -52,7 +55,7 @@ rm -f -r $tmpdir

# get hadoop
if [ ! -f "$hadoop_distrib.tar.gz" ]; then
curl -o $hadoop_distrib.tar.gz "$hadoop_distrib_url"
curl -o $hadoop_distrib.tar.gz "$hadoop_distrib_url"
if [[ $? -ne 0 ]]
then
echo "Failed to download hadoop at $hadoop_distrib"
Expand Down Expand Up @@ -102,6 +105,13 @@ else
echo "$solr_distrib.tgz already extracted"
fi

# tar -zxf
cd $solr_distrib
unzip -o server/webapps/solr.war -d server/solr-webapp/webapp
# Patch map reduce environment for MacOS support. (sed expression shipped with solr only works on linux)
#patch server/scripts/map-reduce/set-map-reduce-classpath.sh < ../solr-map-reduce-classpath.sh.diff
cd ..

## Harmonize Conflicting Jar Dependencies
#######################

Expand All @@ -112,7 +122,7 @@ find $hadoop_distrib -name "httpclient-*.jar" -type f -exec rm {} \;
find $hadoop_distrib -name "httpcore-*.jar" -type f -exec rm {} \;

solr_client=$solr_distrib/server/solr-webapp/webapp/WEB-INF/lib/httpclient-$solr_http_client_version.jar
solr_core=$solr_distrib/server/solr-webapp/webapp/WEB-INF/lib/httpcore-$solr_http_client_version.jar
solr_corer=$solr_distrib/server/solr-webapp/webapp/WEB-INF/lib/httpcore-$solr_http_client_version.jar

cp $solr_client $hadoop_distrib/share/hadoop/tools/lib
cp $solr_corer $hadoop_distrib/share/hadoop/tools/lib
Expand All @@ -127,6 +137,9 @@ cp $solr_client $hadoop_distrib/share/hadoop/common/lib
cp $solr_corer $hadoop_distrib/share/hadoop/common/lib


# Fix JAVA_HOME evaluation on OSX
#sed -e "s/\(export JAVA_HOME=\)(\(.*\))/\1\2/" -i -f $hadoop_distrib/libexec/hadoop-config.sh

## Start HDFS+YARN
#######################

Expand Down Expand Up @@ -195,9 +208,6 @@ rm -r -f server/solr/zoo_data
rm -r -f server/solr/collection1
rm -f server/server.log

# tar -zxf
unzip -o server/webapps/solr.war -d server/solr-webapp/webapp

# fix bad paths in release
sed -i -- 's/example/server/g' server/scripts/map-reduce/set-map-reduce-classpath.sh

Expand All @@ -210,13 +220,37 @@ echo "Upload config files to zookeeper..."
java -classpath "server/solr-webapp/webapp/WEB-INF/lib/*:server/lib/ext/*" org.apache.solr.cloud.ZkCLI -zkhost 127.0.0.1:9983 -cmd upconfig --confdir server/solr/configsets/basic_configs/conf --confname basic_configs -runzk 8983 -solrhome server/solr


echo "Starting Solr Server 1"
cd server
java -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --module=http --stop
java -Xmx512m -DzkRun -DnumShards=2 -Dsolr.directoryFactory=solr.HdfsDirectoryFactory -Dsolr.lock.type=hdfs -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr1 -Dsolr.hdfs.confdir=$hadoop_conf_dir -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --module=http 1>server.log 2>&1 &

java -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --module=http --stop 2>/dev/null
java -Xmx512m \
-Djetty.port=8983 \
-DzkRun \
-DnumShards=2 \
-Dsolr.directoryFactory=solr.HdfsDirectoryFactory \
-Dsolr.lock.type=hdfs \
-Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr1 \
-Dsolr.hdfs.confdir=$hadoop_conf_dir \
-DSTOP.PORT=7983 \
-DSTOP.KEY=key \
-jar start.jar \
--module=http 1>server.log 2>&1 &

echo "Starting Solr Server 2"
cd ../server2
java -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --module=http --stop
java -Xmx512m -Djetty.port=7574 -DzkHost=127.0.0.1:9983 -DnumShards=2 -Dsolr.directoryFactory=solr.HdfsDirectoryFactory -Dsolr.lock.type=hdfs -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr2 -Dsolr.hdfs.confdir=$hadoop_conf_dir -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --module=http 1>server2.log 2>&1 &
java -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --module=http --stop 2>/dev/null
java -Xmx512m \
-Djetty.port=7574 \
-DzkHost=127.0.0.1:9983 \
-DnumShards=2 \
-Dsolr.directoryFactory=solr.HdfsDirectoryFactory \
-Dsolr.lock.type=hdfs \
-Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr2 \
-Dsolr.hdfs.confdir=$hadoop_conf_dir \
-DSTOP.PORT=6574 \
-DSTOP.KEY=key \
-jar start.jar \
--module=http 1>server2.log 2>&1 &

# wait for solr to be ready
sleep 15
Expand All @@ -225,10 +259,21 @@ curl "127.0.0.1:8983/solr/admin/collections?action=CREATE&name=collection1&numSh

cd ../..

#
#
## Build an index with map-reduce and deploy it to SolrCloud
#######################
echo "Building solr index with map reduce"

source $solr_distrib/server/scripts/map-reduce/set-map-reduce-classpath.sh

$hadoop_distrib/bin/hadoop --config $hadoop_conf_dir jar $solr_distrib/dist/solr-map-reduce-*.jar -D 'mapred.child.java.opts=-Xmx500m' -libjars "$HADOOP_LIBJAR" --morphline-file readAvroContainer.conf --zk-host 127.0.0.1:9983 --output-dir hdfs://127.0.0.1:8020/outdir --collection $collection --log4j log4j.properties --go-live --verbose "hdfs://127.0.0.1:8020/indir"
$hadoop_distrib/bin/hadoop --config $hadoop_conf_dir \
jar $solr_distrib/dist/solr-map-reduce-*.jar \
-D 'mapred.child.java.opts=-Xmx500m' \
-libjars "$HADOOP_LIBJAR" \
--morphline-file readAvroContainer.conf \
--zk-host 127.0.0.1:9983 \
--output-dir hdfs://127.0.0.1:8020/outdir \
--collection $collection \
--log4j log4j.properties \
--go-live \
--verbose "hdfs://127.0.0.1:8020/indir"
4 changes: 2 additions & 2 deletions stop-example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ hadoop-*/sbin/yarn-daemon.sh --config $hadoop_conf_dir stop nodemanager

cd solr*

cd example
cd server
java -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar --stop

cd ../example2
cd ../server2
java -DSTOP.PORT=6574 -DSTOP.KEY=key -jar start.jar --stop