Skip to content

Commit df29d0e

Browse files
committed
Initial commit
0 parents  commit df29d0e

File tree

315 files changed

+354873
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

315 files changed

+354873
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*~
2+
*.swp
3+
build
4+
work

Makefile

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
EMPTY =
2+
SPACE = $(EMPTY) $(EMPTY)
3+
4+
# Build up classpath by concatenating some strings
5+
JARS = third_party/nexus.jar
6+
JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
7+
JARS += third_party/colt.jar
8+
JARS += third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
9+
JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
10+
JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
11+
JARS += third_party/scalatest-1.0/scalatest-1.0.jar
12+
JARS += third_party/ScalaCheck-1.5.jar
13+
CLASSPATH = $(subst $(SPACE),:,$(JARS))
14+
15+
SCALA_SOURCES = src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
16+
SCALA_SOURCES += src/test/spark/*.scala src/test/spark/repl/*.scala
17+
18+
JAVA_SOURCES = $(wildcard src/java/spark/compress/lzf/*.java)
19+
20+
ifeq ($(USE_FSC),1)
21+
COMPILER_NAME = fsc
22+
else
23+
COMPILER_NAME = scalac
24+
endif
25+
26+
ifeq ($(SCALA_HOME),)
27+
COMPILER = $(COMPILER_NAME)
28+
else
29+
COMPILER = $(SCALA_HOME)/bin/$(COMPILER_NAME)
30+
endif
31+
32+
all: scala java
33+
34+
build/classes:
35+
mkdir -p build/classes
36+
37+
scala: build/classes java
38+
$(COMPILER) -unchecked -d build/classes -classpath $(CLASSPATH) $(SCALA_SOURCES)
39+
40+
java: $(JAVA_SOURCES) build/classes
41+
javac -d build/classes $(JAVA_SOURCES)
42+
43+
native: java
44+
$(MAKE) -C src/native
45+
46+
jar: build/spark.jar build/spark-dep.jar
47+
48+
build/spark.jar: scala java
49+
jar cf build/spark.jar -C build/classes spark
50+
51+
build/spark-dep.jar:
52+
mkdir -p build/dep
53+
cd build/dep && for i in $(JARS); do jar xf ../../$$i; done
54+
jar cf build/spark-dep.jar -C build/dep .
55+
56+
test: all
57+
./alltests
58+
59+
default: all
60+
61+
clean:
62+
$(MAKE) -C src/native clean
63+
rm -rf build
64+
65+
.phony: default all clean scala java native jar

README

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Spark requires Scala 2.7.7. It will currently not work with 2.8, or with
2+
earlier versions of the 2.7 branch.
3+
4+
To build and run Spark, you will need to have Scala's bin in your $PATH,
5+
or you will need to set the SCALA_HOME environment variable to point
6+
to where you've installed Scala. Scala must be accessible through one
7+
of these methods on Nexus slave nodes as well as on the master.
8+
9+
To build Spark and the example programs, run make.
10+
11+
To run one of the examples, use ./run <class> <params>. For example,
12+
./run SparkLR will run the Logistic Regression example. Each of the
13+
example programs prints usage help if no params are given.
14+
15+
Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
16+
to have the Makefile use the fsc compiler daemon instead of scalac.

alltests

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
FWDIR=`dirname $0`
3+
$FWDIR/run org.scalatest.tools.Runner -p $FWDIR/build/classes -o $@

lr_data.txt

Lines changed: 1000 additions & 0 deletions
Large diffs are not rendered by default.

run

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
# Figure out where the Scala framework is installed
4+
FWDIR=`dirname $0`
5+
6+
# Set JAVA_OPTS to be able to load libnexus.so and set various other misc options
7+
JAVA_OPTS="-Djava.library.path=$FWDIR/third_party:$FWDIR/src/native -Xmx750m"
8+
if [ -e $FWDIR/conf/java-opts ] ; then
9+
JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
10+
fi
11+
export JAVA_OPTS
12+
13+
# Build up classpath
14+
CLASSPATH=$FWDIR/build/classes
15+
CLASSPATH+=:$FWDIR/third_party/nexus.jar
16+
CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
17+
CLASSPATH+=:$FWDIR/third_party/colt.jar
18+
CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
19+
CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
20+
CLASSPATH+=:third_party/scalatest-1.0/scalatest-1.0.jar
21+
CLASSPATH+=:third_party/ScalaCheck-1.5.jar
22+
for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
23+
CLASSPATH+=:$jar
24+
done
25+
export CLASSPATH
26+
27+
if [ -n "$SCALA_HOME" ]; then
28+
SCALA=${SCALA_HOME}/bin/scala
29+
else
30+
SCALA=scala
31+
fi
32+
33+
exec $SCALA -cp $CLASSPATH $@

spark-executor

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/sh
2+
echo "In spark-executor"
3+
FWDIR="`dirname $0`"
4+
echo Framework dir: $FWDIR
5+
exec $FWDIR/run spark.Executor

spark-shell

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/sh
2+
FWDIR="`dirname $0`"
3+
exec $FWDIR/run spark.repl.Main $@

src/examples/CpuHog.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import spark._
2+
3+
object CpuHog {
4+
def main(args: Array[String]) {
5+
if (args.length != 3) {
6+
System.err.println("Usage: CpuHog <master> <tasks> <threads_per_task>");
7+
System.exit(1)
8+
}
9+
val sc = new SparkContext(args(0), "CPU hog")
10+
val tasks = args(1).toInt
11+
val threads = args(2).toInt
12+
def task {
13+
for (i <- 0 until threads-1) {
14+
new Thread() {
15+
override def run {
16+
while(true) {}
17+
}
18+
}.start()
19+
}
20+
while(true) {}
21+
}
22+
sc.runTasks(Array.make(tasks, () => task))
23+
}
24+
}

src/examples/HdfsTest.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import spark._
2+
3+
object HdfsTest {
4+
def main(args: Array[String]) {
5+
val sc = new SparkContext(args(0), "HdfsTest")
6+
val file = sc.textFile(args(1))
7+
val mapped = file.map(s => s.length).cache()
8+
for (iter <- 1 to 10) {
9+
val start = System.currentTimeMillis()
10+
for (x <- mapped) { x + 2 }
11+
// println("Processing: " + x)
12+
val end = System.currentTimeMillis()
13+
println("Iteration " + iter + " took " + (end-start) + " ms")
14+
}
15+
}
16+
}

0 commit comments

Comments
 (0)