Skip to content

Commit 80091bd

Browse files
committed
Merge branch 'release-0.6.0'
2 parents 03adb5f + 44d7a51 commit 80091bd

File tree

135 files changed

+5825
-881
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+5825
-881
lines changed

bin/build.sh

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,24 @@ else
7272
$SBT $CLEAN update assembly
7373

7474
# Build process itemrec Top-k Items Collector
75-
echo "Going to build PredictionIO Top-k Items Collector Assembly..."
75+
echo "Going to build PredictionIO ItemRec Top-k Items Collector Assembly..."
7676
cd $BASE/process/engines/itemrec/evaluations/scala/topkitems
7777
$SBT $CLEAN update assembly
7878

7979
# Build process itemsim algo assembly
80-
echo "Going to build PredictionIO Process ItemRec Algorithms Assembly..."
80+
echo "Going to build PredictionIO Process ItemSim Algorithms Assembly..."
8181
cd $BASE/process/engines/itemsim/algorithms/hadoop/scalding
8282
$SBT $CLEAN update assembly
83+
84+
# Build process itemsim eval assembly
85+
echo "Going to build PredictionIO Process ItemSim Evaluations Assembly..."
86+
cd $BASE/process/engines/itemsim/evaluations/hadoop/scalding
87+
$SBT $CLEAN update assembly
88+
89+
# Build process itemsim Top-k Items Collector
90+
echo "Going to build PredictionIO ItemSim Top-k Items Collector Assembly..."
91+
cd $BASE/process/engines/itemsim/evaluations/scala/topkitems
92+
$SBT $CLEAN update assembly
8393
fi
8494

8595
# Build connection check tool

bin/change-version.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ def change(filename, oldversion, newversion):
4444
'process/engines/itemrec/evaluations/hadoop/scalding/trainingtestsplit/build.sbt',
4545
'process/engines/itemrec/evaluations/scala/*/build.sbt',
4646
'process/engines/itemsim/algorithms/hadoop/scalding/build.sbt',
47-
'process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/build.sbt',
47+
'process/engines/itemsim/evaluations/hadoop/scalding/build.sbt',
48+
'process/engines/itemsim/evaluations/scala/*/build.sbt',
4849
'servers/*/project/Build.scala',
4950
'servers/scheduler/conf/application.conf',
5051
'tools/*/build.sbt',

bin/common.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# This script should be sourced with $BASE set to the base of the repository
44

5-
VERSION=0.5.2
5+
VERSION=0.6.0
66

77
# Play framework related
88
PLAY_OPTS=

bin/package.sh

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,24 @@ cp -R $DIST_DIR/bin $PACKAGE_DIR
4747
cp $BASE/bin/quiet.sh $PACKAGE_DIR/bin
4848
cp -R $DIST_DIR/conf $PACKAGE_DIR
4949

50-
cp "$BASE/process/engines/itemrec/algorithms/hadoop/scalding/target/scala-2.9.2/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
50+
cp "$BASE/process/engines/itemrec/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
5151
cp "$BASE/process/engines/itemrec/algorithms/scala/mahout/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
52-
cp "$BASE/process/engines/itemrec/evaluations/hadoop/scalding/target/scala-2.9.2/PredictionIO-Process-ItemRec-Evaluations-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
52+
cp "$BASE/process/engines/itemrec/evaluations/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
5353
cp "$BASE/process/engines/itemrec/evaluations/scala/topkitems/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-TopKItems-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
54-
cp "$BASE/process/engines/itemrec/evaluations/scala/trainingtestsplit/target/scala-2.9.2/PredictionIO-Process-ItemRec-Evaluations-Scala-TrainingTestSplitTime-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
54+
cp "$BASE/process/engines/itemrec/evaluations/scala/trainingtestsplit/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-Scala-TrainingTestSplitTime-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
5555
cp "$BASE/process/engines/itemrec/evaluations/scala/paramgen/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-ParamGen-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
56-
cp "$BASE/process/engines/itemsim/algorithms/hadoop/scalding/target/scala-2.9.2/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
56+
cp "$BASE/process/engines/itemsim/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
57+
cp "$BASE/process/engines/itemsim/evaluations/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Evaluations-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
58+
cp "$BASE/process/engines/itemsim/evaluations/scala/topkitems/target/scala-2.10/PredictionIO-Process-ItemSim-Evaluations-TopKItems-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
5759
cp -n $BASE/tools/conncheck/target/pack/lib/* $PACKAGE_DIR/lib
5860
cp -n $BASE/tools/migration/0.5/appdata/target/pack/lib/* $PACKAGE_DIR/lib
5961
cp -n $BASE/tools/settingsinit/target/pack/lib/* $PACKAGE_DIR/lib
6062
cp -n $BASE/tools/softwaremanager/target/pack/lib/* $PACKAGE_DIR/lib
6163
cp -n $BASE/tools/users/target/pack/lib/* $PACKAGE_DIR/lib
6264

65+
mkdir -p $PACKAGE_DIR/vendors/mahout-distribution-0.8
66+
cp $VENDOR_MAHOUT/mahout-core-0.8-job.jar $PACKAGE_DIR/vendors/mahout-distribution-0.8
67+
6368
cd $DIST_DIR/target
6469
rm "$PACKAGE_NAME.zip"
6570
zip -q -r "$PACKAGE_NAME.zip" "$PACKAGE_NAME"

bin/vendors.sh

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,22 @@ install_play () {
2727
}
2828

2929
install_mahout () {
30-
echo "Going to download and build Apache Mahout 0.8 Build 1993..."
31-
mkdir -p $LIB_MAHOUT
32-
cd $LIB_MAHOUT
33-
if [ ! -f mahout-core-0.8-SNAPSHOT.jar -o ! -f mahout-math-0.8-SNAPSHOT.jar ] ; then
34-
rm -rf $LIB_MAHOUT
35-
mkdir -p $LIB_MAHOUT
36-
cd $LIB_MAHOUT
37-
curl -o mahout-core-0.8-SNAPSHOT.jar http://download.prediction.io/mahout-snapshots/1993/mahout-core-0.8-SNAPSHOT.jar
38-
curl -o mahout-math-0.8-SNAPSHOT.jar http://download.prediction.io/mahout-snapshots/1993/mahout-math-0.8-SNAPSHOT.jar
39-
fi
40-
mkdir -p $VENDOR_MAHOUT
41-
cd $VENDOR_MAHOUT
42-
if [ ! -f mahout-core-0.8-SNAPSHOT-job.jar ] ; then
43-
curl -o mahout-core-0.8-SNAPSHOT-job.jar http://download.prediction.io/mahout-snapshots/1993/mahout-core-0.8-SNAPSHOT-job.jar
44-
fi
30+
echo "Going to download and install Apache Mahout 0.8..."
31+
mkdir -p $VENDORS_PATH
32+
cd $VENDORS_PATH
33+
echo "Retrieving Apache mirror list..."
34+
curl -o apache_mahout_mirrors.txt http://www.apache.org/dyn/closer.cgi/mahout/0.8/mahout-distribution-0.8.tar.gz
35+
MAHOUT_URL=$(cat apache_mahout_mirrors.txt | grep -m 1 "<strong>.*</strong>" | sed 's/.*<strong>//' | sed 's/<\/strong>.*//')
36+
echo "Found mirror: $MAHOUT_URL"
37+
curl -O $MAHOUT_URL
38+
tar zxvf mahout-distribution-0.8.tar.gz
4539
}
4640

4741
# Third party software
4842
VENDORS_PATH="$BASE/vendors"
4943
VENDOR_SBT="$VENDORS_PATH/sbt-0.12.3/sbt"
5044
VENDOR_PLAY="$VENDORS_PATH/play-2.1.1/play"
51-
VENDOR_MAHOUT="$VENDORS_PATH/mahout-0.8-snapshot"
52-
LIB_MAHOUT="$BASE/process/engines/itemrec/algorithms/scala/mahout/commons/lib"
45+
VENDOR_MAHOUT="$VENDORS_PATH/mahout-distribution-0.8"
5346

5447
# Detect existing installations in search path
5548
# Do not use existing sbt to enforce JVM settings
@@ -79,11 +72,11 @@ else
7972
exit 1
8073
fi
8174

82-
if [ -r "$LIB_MAHOUT/mahout-core-0.8-SNAPSHOT.jar" -a -r "$LIB_MAHOUT/mahout-math-0.8-SNAPSHOT.jar" -a -r "$VENDOR_MAHOUT/mahout-core-0.8-SNAPSHOT-job.jar" ] ; then
83-
echo "Using Apache Mahout 0.8 Build 1993 in vendors."
75+
if [ -r "$VENDOR_MAHOUT/mahout-core-0.8-job.jar" ] ; then
76+
echo "Using Apache Mahout 0.8 in vendors."
8477
elif install_mahout ; then
8578
echo ""
8679
else
87-
echo "Unable to locate Apache Mahout 0.8 Build 1993 and automatic installation failed. Aborting." >&2
80+
echo "Unable to locate Apache Mahout 0.8 and automatic installation failed. Aborting." >&2
8881
exit 1
8982
fi

commons/build.sbt

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,19 @@
11
name := "PredictionIO Commons"
22

3-
version := "0.5.2"
3+
version := "0.6.0"
44

55
organization := "io.prediction"
66

7-
scalaVersion := "2.10.0"
8-
9-
crossScalaVersions := Seq("2.9.2", "2.10.0")
7+
scalaVersion := "2.10.2"
108

119
scalacOptions in (Compile, doc) ++= Opts.doc.title("PredictionIO Commons API Documentation")
1210

1311
libraryDependencies ++= Seq(
14-
"com.github.nscala-time" %% "nscala-time" % "0.2.0",
15-
"com.twitter" %% "chill" % "0.2.2",
16-
"com.typesafe" % "config" % "1.0.0",
12+
"com.github.nscala-time" %% "nscala-time" % "0.4.2",
13+
"com.twitter" %% "chill" % "0.2.3",
14+
"com.typesafe" % "config" % "1.0.2",
1715
"org.mongodb" %% "casbah" % "2.6.2",
18-
"org.specs2" %% "specs2" % "1.12.3" % "test"
16+
"org.specs2" %% "specs2" % "1.14" % "test"
1917
)
2018

2119
publishTo := Some(Resolver.file("file", new File(Path.userHome.absolutePath+"/.m2/repository")))

commons/src/main/scala/io/prediction/commons/Config.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,16 @@ class Config {
630630
}
631631
}
632632

633+
/** Obtains an ItemSimScores object with configured backend type. */
634+
def getModeldataItemSimScores(): modeldata.ItemSimScores = {
635+
modeldataDbType match {
636+
case "mongodb" => {
637+
new modeldata.mongodb.MongoItemSimScores(modeldataMongoDb.get)
638+
}
639+
case _ => throw new RuntimeException("Invalid modeldata database type: " + modeldataDbType)
640+
}
641+
}
642+
633643
/** Obtains an ItemRecScores object with configured backend type. */
634644
def getModeldataTrainingItemRecScores(): modeldata.ItemRecScores = {
635645
modeldataDbType match {
@@ -639,4 +649,14 @@ class Config {
639649
case _ => throw new RuntimeException("Invalid modeldata database type: " + modeldataTrainingDbType)
640650
}
641651
}
652+
653+
/** Obtains an ItemSimScores object with configured backend type. */
654+
def getModeldataTrainingItemSimScores(): modeldata.ItemSimScores = {
655+
modeldataDbType match {
656+
case "mongodb" => {
657+
new modeldata.mongodb.MongoItemSimScores(modeldataTrainingMongoDb.get)
658+
}
659+
case _ => throw new RuntimeException("Invalid modeldata database type: " + modeldataTrainingDbType)
660+
}
661+
}
642662
}

commons/src/main/scala/io/prediction/commons/appdata/Items.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ trait Items {
3838
/** Get an item by ID. */
3939
def get(appid: Int, id: String): Option[Item]
4040

41+
/** Find all items by App ID. */
42+
def getByAppid(appid: Int): Iterator[Item]
43+
4144
/** Get items by IDs. */
4245
def getByIds(appid: Int, ids: Seq[String]): Seq[Item]
4346

commons/src/main/scala/io/prediction/commons/appdata/mongodb/MongoItems.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ class MongoItems(db: MongoDB) extends Items {
3939
itemColl.findOne(MongoDBObject("_id" -> idWithAppid(appid, id))) map { dbObjToItem(_) }
4040
}
4141

42+
def getByAppid(appid: Int) = new MongoItemsIterator(itemColl.find(MongoDBObject("appid" -> appid)))
43+
4244
def getByIds(appid: Int, ids: Seq[String]) = {
4345
itemColl.find(MongoDBObject("_id" -> MongoDBObject("$in" -> ids.map(idWithAppid(appid, _))))).toList map { dbObjToItem(_) }
4446
}
@@ -90,4 +92,9 @@ class MongoItems(db: MongoDB) extends Items {
9092
attributes = Option(getAttributesFromDBObject(dbObj)).filter(!_.isEmpty)
9193
)
9294
}
95+
96+
class MongoItemsIterator(it: MongoCursor) extends Iterator[Item] {
97+
def next = dbObjToItem(it.next)
98+
def hasNext = it.hasNext
99+
}
93100
}

commons/src/main/scala/io/prediction/commons/modeldata/ItemRecScores.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import io.prediction.commons.settings.{Algo, App, OfflineEval}
99
* @param iid Item ID.
1010
* @param score Recommendation score.
1111
* @param itypes Item types of the item recommended. Copied from the item when a batch mode algorithm is run.
12+
* @param appid App ID of this record.
1213
* @param algoid Algo ID of this record.
1314
* @param modelset Model data set.
1415
* @param id ItemRecScore ID (optional field used internally for sorting)
@@ -17,7 +18,7 @@ case class ItemRecScore(
1718
uid: String,
1819
iid: String,
1920
score: Double,
20-
itypes: List[String],
21+
itypes: Seq[String],
2122
appid: Int,
2223
algoid: Int,
2324
modelset: Boolean,
@@ -26,14 +27,14 @@ case class ItemRecScore(
2627

2728
/** Base trait for implementations that interact with itemrec scores in the backend data store. */
2829
trait ItemRecScores {
29-
/** Insert an ItemSimScore and return it with a real ID, if any (database vendor dependent). */
30+
/** Insert an ItemRecScore and return it with a real ID, if any (database vendor dependent). */
3031
def insert(itemRecScore: ItemRecScore): ItemRecScore
3132

32-
/** Get the top N ItemSimScore ranked by score in descending order.
33+
/** Get the top N ItemRecScore ranked by score in descending order.
3334
*
34-
* @param after Returns the next top N results after the provided ItemSimScore, if provided.
35+
* @param after Returns the next top N results after the provided ItemRecScore, if provided.
3536
*/
36-
def getTopN(uid: String, n: Int, itypes: Option[List[String]], after: Option[ItemRecScore])(implicit app: App, algo: Algo, offlineEval: Option[OfflineEval] = None): Iterator[ItemRecScore]
37+
def getTopN(uid: String, n: Int, itypes: Option[Seq[String]], after: Option[ItemRecScore])(implicit app: App, algo: Algo, offlineEval: Option[OfflineEval] = None): Iterator[ItemRecScore]
3738

3839
/** Delete by Algo ID. */
3940
def deleteByAlgoid(algoid: Int)

0 commit comments

Comments
 (0)