From f5dbeeac1b970497eeb011d9733f2e1455ec9573 Mon Sep 17 00:00:00 2001 From: baunsgaard Date: Mon, 26 Jun 2023 15:27:38 +0200 Subject: [PATCH 1/4] [MINOR] Add licenses to federated tutorial --- scripts/tutorials/federated/.gitignore | 20 ++++++++++++++++ scripts/tutorials/federated/README.md | 19 +++++++++++++++ .../dataGen/federatedMetaDataGenerator.py | 21 ++++++++++++++++ scripts/tutorials/federated/code/exp/CNN.dml | 20 ++++++++++++++++ scripts/tutorials/federated/code/exp/lm.dml | 21 ++++++++++++++++ .../tutorials/federated/code/exp/mLogReg.dml | 21 ++++++++++++++++ scripts/tutorials/federated/code/exp/sum.dml | 21 ++++++++++++++++ .../federated/code/exp/sumRepeat.dml | 21 ++++++++++++++++ scripts/tutorials/federated/conf/def.xml | 18 ++++++++++++++ .../federated/conf/log4j-debug.properties | 21 ++++++++++++++++ .../federated/conf/log4j-info.properties | 22 +++++++++++++++++ .../federated/conf/log4j-off.properties | 21 ++++++++++++++++ scripts/tutorials/federated/conf/ssl.xml | 18 ++++++++++++++ scripts/tutorials/federated/install.sh | 23 +++++++++++++++++- scripts/tutorials/federated/parameters.sh | 23 +++++++++++++++++- scripts/tutorials/federated/portforward.sh | 23 +++++++++++++++++- scripts/tutorials/federated/run.sh | 22 ++++++++++++++++- .../federated/scripts/startMonitoring.sh | 24 ++++++++++++++++--- .../federated/scripts/startWorker.sh | 22 ++++++++++++++++- .../federated/scripts/stopMonitoring.sh | 22 ++++++++++++++++- .../tutorials/federated/scripts/stopWorker.sh | 22 ++++++++++++++++- scripts/tutorials/federated/setup.sh | 23 ++++++++++++++++-- .../tutorials/federated/startAllWorkers.sh | 22 ++++++++++++++++- scripts/tutorials/federated/stopAllWorkers.sh | 22 ++++++++++++++++- scripts/tutorials/federated/sync.sh | 22 ++++++++++++++++- 25 files changed, 519 insertions(+), 15 deletions(-) diff --git a/scripts/tutorials/federated/.gitignore b/scripts/tutorials/federated/.gitignore index a75af7fdd0b..2f4d395e114 100644 --- a/scripts/tutorials/federated/.gitignore +++ b/scripts/tutorials/federated/.gitignore @@ -1,3 +1,23 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- data/ mnist/ diff --git a/scripts/tutorials/federated/README.md b/scripts/tutorials/federated/README.md index ec2c7cf4ad9..26acd011e5a 100644 --- a/scripts/tutorials/federated/README.md +++ b/scripts/tutorials/federated/README.md @@ -1,3 +1,22 @@ + + # Federated SystemDS tutorial This tutorial is dedicated to a distributed example of systemds federated. diff --git a/scripts/tutorials/federated/code/dataGen/federatedMetaDataGenerator.py b/scripts/tutorials/federated/code/dataGen/federatedMetaDataGenerator.py index 67af88f62f6..049dc0ca733 100644 --- a/scripts/tutorials/federated/code/dataGen/federatedMetaDataGenerator.py +++ b/scripts/tutorials/federated/code/dataGen/federatedMetaDataGenerator.py @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + import argparse import io import json diff --git a/scripts/tutorials/federated/code/exp/CNN.dml b/scripts/tutorials/federated/code/exp/CNN.dml index c4afd11ae5c..ec033210a0a 100644 --- a/scripts/tutorials/federated/code/exp/CNN.dml +++ b/scripts/tutorials/federated/code/exp/CNN.dml @@ -1,3 +1,23 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- X = read($1) / 255 Y = read($2) diff --git a/scripts/tutorials/federated/code/exp/lm.dml b/scripts/tutorials/federated/code/exp/lm.dml index 120e149d86a..050fced4fff 100644 --- a/scripts/tutorials/federated/code/exp/lm.dml +++ b/scripts/tutorials/federated/code/exp/lm.dml @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + X = read($1) Y = read($2) diff --git a/scripts/tutorials/federated/code/exp/mLogReg.dml b/scripts/tutorials/federated/code/exp/mLogReg.dml index 003880fb9e6..165c85ba913 100644 --- a/scripts/tutorials/federated/code/exp/mLogReg.dml +++ b/scripts/tutorials/federated/code/exp/mLogReg.dml @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + X = read($1) Y = read($2) + 1 Xt = read($3) diff --git a/scripts/tutorials/federated/code/exp/sum.dml b/scripts/tutorials/federated/code/exp/sum.dml index 64558fe1f5e..f13084bdba8 100644 --- a/scripts/tutorials/federated/code/exp/sum.dml +++ b/scripts/tutorials/federated/code/exp/sum.dml @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + X = read($1) X_s = sum(X) diff --git a/scripts/tutorials/federated/code/exp/sumRepeat.dml b/scripts/tutorials/federated/code/exp/sumRepeat.dml index b14ddd3621e..7cbfd7e8e94 100644 --- a/scripts/tutorials/federated/code/exp/sumRepeat.dml +++ b/scripts/tutorials/federated/code/exp/sumRepeat.dml @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + X = read($1) for(x in 1:$2){ diff --git a/scripts/tutorials/federated/conf/def.xml b/scripts/tutorials/federated/conf/def.xml index 60622e819ab..1b01bf5d32a 100644 --- a/scripts/tutorials/federated/conf/def.xml +++ b/scripts/tutorials/federated/conf/def.xml @@ -1,2 +1,20 @@ + \ No newline at end of file diff --git a/scripts/tutorials/federated/conf/log4j-debug.properties b/scripts/tutorials/federated/conf/log4j-debug.properties index 8579f455c0c..aaed84f0732 100644 --- a/scripts/tutorials/federated/conf/log4j-debug.properties +++ b/scripts/tutorials/federated/conf/log4j-debug.properties @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + log4j.rootLogger=ALL, console log4j.logger.io.netty=ERROR diff --git a/scripts/tutorials/federated/conf/log4j-info.properties b/scripts/tutorials/federated/conf/log4j-info.properties index 4aabf85cf3f..87c52693a49 100644 --- a/scripts/tutorials/federated/conf/log4j-info.properties +++ b/scripts/tutorials/federated/conf/log4j-info.properties @@ -1,3 +1,25 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + log4j.rootLogger=ALL, console log4j.logger.io.netty=ERROR diff --git a/scripts/tutorials/federated/conf/log4j-off.properties b/scripts/tutorials/federated/conf/log4j-off.properties index 5c3a3671d59..63b9f2fa38b 100644 --- a/scripts/tutorials/federated/conf/log4j-off.properties +++ b/scripts/tutorials/federated/conf/log4j-off.properties @@ -1,3 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + log4j.rootLogger=ALL, console log4j.logger.io.netty=ERROR diff --git a/scripts/tutorials/federated/conf/ssl.xml b/scripts/tutorials/federated/conf/ssl.xml index 2acd9add7ac..f375ba33fed 100644 --- a/scripts/tutorials/federated/conf/ssl.xml +++ b/scripts/tutorials/federated/conf/ssl.xml @@ -1,3 +1,21 @@ + true \ No newline at end of file diff --git a/scripts/tutorials/federated/install.sh b/scripts/tutorials/federated/install.sh index 3cae2741f10..9ddd94d1f9e 100755 --- a/scripts/tutorials/federated/install.sh +++ b/scripts/tutorials/federated/install.sh @@ -1,4 +1,25 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + source parameters.sh if [[ ! -d "python_venv" ]]; then diff --git a/scripts/tutorials/federated/parameters.sh b/scripts/tutorials/federated/parameters.sh index 18e6b56e7ac..1d67f9a619a 100755 --- a/scripts/tutorials/federated/parameters.sh +++ b/scripts/tutorials/federated/parameters.sh @@ -1,4 +1,24 @@ -#!/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- # Memory allowed to be used by each worker and coordinator export SYSTEMDS_STANDALONE_OPTS="-Xmx4g -Xms4g -Xmn400m" @@ -8,6 +28,7 @@ export SYSTEMDS_ROOT="$HOME/github/systemds" export PATH="$SYSTEMDS_ROOT/bin:$PATH" ## Logging variables: +# Set logging properties for the system export LOG4JPROP='conf/log4j-off.properties' # export LOG4JPROP='conf/log4j-debug.properties' # export LOG4JPROP='conf/log4j-info.properties' diff --git a/scripts/tutorials/federated/portforward.sh b/scripts/tutorials/federated/portforward.sh index 9829a3c1887..5418c487df2 100755 --- a/scripts/tutorials/federated/portforward.sh +++ b/scripts/tutorials/federated/portforward.sh @@ -1,4 +1,25 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + source parameters.sh for index in ${!address[*]}; do diff --git a/scripts/tutorials/federated/run.sh b/scripts/tutorials/federated/run.sh index e9138d0d01b..d465469f87c 100755 --- a/scripts/tutorials/federated/run.sh +++ b/scripts/tutorials/federated/run.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- source parameters.sh diff --git a/scripts/tutorials/federated/scripts/startMonitoring.sh b/scripts/tutorials/federated/scripts/startMonitoring.sh index aaa95ba84e2..e346a72b263 100755 --- a/scripts/tutorials/federated/scripts/startMonitoring.sh +++ b/scripts/tutorials/federated/scripts/startMonitoring.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- source parameters.sh @@ -25,5 +45,3 @@ echo $! > "tmp/monitoring/UIProcessID" echo "Starting UI" sleep 10 - - diff --git a/scripts/tutorials/federated/scripts/startWorker.sh b/scripts/tutorials/federated/scripts/startWorker.sh index e9e70f29b6c..2b476111b2a 100755 --- a/scripts/tutorials/federated/scripts/startWorker.sh +++ b/scripts/tutorials/federated/scripts/startWorker.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- ## This script is to be run on the federated site diff --git a/scripts/tutorials/federated/scripts/stopMonitoring.sh b/scripts/tutorials/federated/scripts/stopMonitoring.sh index 0c3a37218a6..59f35c72b91 100755 --- a/scripts/tutorials/federated/scripts/stopMonitoring.sh +++ b/scripts/tutorials/federated/scripts/stopMonitoring.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- if [[ -d "tmp/monitoring" ]]; then echo "Stopping Monitoring " diff --git a/scripts/tutorials/federated/scripts/stopWorker.sh b/scripts/tutorials/federated/scripts/stopWorker.sh index d5d122a5035..87f86828ed0 100755 --- a/scripts/tutorials/federated/scripts/stopWorker.sh +++ b/scripts/tutorials/federated/scripts/stopWorker.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- ## This script is to be run on the federated site diff --git a/scripts/tutorials/federated/setup.sh b/scripts/tutorials/federated/setup.sh index 6368596813e..a3846080f7b 100755 --- a/scripts/tutorials/federated/setup.sh +++ b/scripts/tutorials/federated/setup.sh @@ -1,5 +1,24 @@ -#/bin/bash - +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- source parameters.sh export LOG4JPROP='conf/log4j-off.properties' diff --git a/scripts/tutorials/federated/startAllWorkers.sh b/scripts/tutorials/federated/startAllWorkers.sh index 82e368654b4..02176d0da2b 100755 --- a/scripts/tutorials/federated/startAllWorkers.sh +++ b/scripts/tutorials/federated/startAllWorkers.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- source parameters.sh diff --git a/scripts/tutorials/federated/stopAllWorkers.sh b/scripts/tutorials/federated/stopAllWorkers.sh index 2470e0838bc..d9384eb130f 100755 --- a/scripts/tutorials/federated/stopAllWorkers.sh +++ b/scripts/tutorials/federated/stopAllWorkers.sh @@ -1,4 +1,24 @@ -#!/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- source parameters.sh diff --git a/scripts/tutorials/federated/sync.sh b/scripts/tutorials/federated/sync.sh index c37d80c7efe..51571b181c7 100755 --- a/scripts/tutorials/federated/sync.sh +++ b/scripts/tutorials/federated/sync.sh @@ -1,4 +1,24 @@ -#/bin/bash +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- source parameters.sh From 56b4e8dddb6407b2662ed4f0ed4aea0e9d55aae0 Mon Sep 17 00:00:00 2001 From: baunsgaard Date: Mon, 26 Jun 2023 15:28:33 +0200 Subject: [PATCH 2/4] [MINOR] Do not RAT ignore tutorials --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5bc2c0fcb72..e84064080be 100644 --- a/pom.xml +++ b/pom.xml @@ -538,7 +538,6 @@ scripts/perftest/results/** scripts/perftest/temp/** - scripts/tutorials/** scripts/perftest/logs/** scripts/monitoring/node_modules/** .repository/ From 0aa69fd8ec38aa142ab916430ad5ed5ae613459f Mon Sep 17 00:00:00 2001 From: baunsgaard Date: Mon, 26 Jun 2023 16:54:51 +0200 Subject: [PATCH 3/4] [Minor] Federated Tutorial text update --- scripts/tutorials/federated/README.md | 82 ++++++++++++++----- .../federated/code/dataGen/slice.dml | 22 ++++- .../tutorials/federated/code/exp/mLogReg.dml | 2 +- scripts/tutorials/federated/parameters.sh | 10 +++ scripts/tutorials/federated/run.sh | 18 ++-- 5 files changed, 102 insertions(+), 32 deletions(-) diff --git a/scripts/tutorials/federated/README.md b/scripts/tutorials/federated/README.md index 26acd011e5a..7bd7e08f7d4 100644 --- a/scripts/tutorials/federated/README.md +++ b/scripts/tutorials/federated/README.md @@ -28,7 +28,7 @@ Before you begin look trough the parameters.sh file, and change the variables to The default parameters are set to execute a four worker setup on localhost. If you have access to other machines simply change the address list to the remote locations, either using IP addresses, or aliases. -Also note the memory settings, and set these appropriately +Also note the memory settings, and set these appropriately such that you do not run out of memory for local execution of 5 parallel java processes. Before going further it is expected that you have setup the default install of SystemDS described in: @@ -72,25 +72,48 @@ Next we download and split the dataset into partitions that the different federa The expected output is: ```txt -Me:~/github/federatedTutorial$ ./setup.sh -Generating data/mnist_features_2_1.data +Me:~/.../scripts/tutorials/federated$ ./setup.sh +Generating data/mnist_features_4_1.data +Generating data/mnist_labels_4_1.data +Generating data/mnist_labels_hot_4_1.data +Generating data/mnist_test_features_4_1.data +Generating data/mnist_test_labels_4_1.data +Generating data/mnist_test_labels_hot_4_1.data SystemDS Statistics: -Total execution time: 0.672 sec. +Total execution time: 0.892 sec. -Generating data/mnist_labels_2_1.data SystemDS Statistics: -Total execution time: 0.109 sec. +Total execution time: 0.943 sec. + +SystemDS Statistics: +Total execution time: 0.732 sec. + +SystemDS Statistics: +Total execution time: 0.992 sec. + +SystemDS Statistics: +Total execution time: 1.227 sec. + +SystemDS Statistics: +Total execution time: 1.274 sec. ``` and the data folder should contain the following: ```txt -Me:~/github/federatedTutorial$ ls data -fed_mnist_features_1.json fed_mnist_labels_1.json.mtd mnist_features_2_1.data mnist_features.data.mtd mnist_labels_2_2.data mnist_test_features.data.mtd -fed_mnist_features_1.json.mtd fed_mnist_labels_2.json mnist_features_2_1.data.mtd mnist_labels_1_1.data mnist_labels_2_2.data.mtd mnist_test_labels.data -fed_mnist_features_2.json fed_mnist_labels_2.json.mtd mnist_features_2_2.data mnist_labels_1_1.data.mtd mnist_labels.data mnist_test_labels.data.mtd -fed_mnist_features_2.json.mtd mnist_features_1_1.data mnist_features_2_2.data.mtd mnist_labels_2_1.data mnist_labels.data.mtd -fed_mnist_labels_1.json mnist_features_1_1.data.mtd mnist_features.data mnist_labels_2_1.data.mtd mnist_test_features.data +Me:~/.../scripts/tutorials/federated$ ls data +fed_mnist_features_4.json mnist_features_4_1.data mnist_labels_4_2.data mnist_labels_hot_4_3.data mnist_test_features_4_4.data mnist_test_labels.data +fed_mnist_features_4.json.mtd mnist_features_4_1.data.mtd mnist_labels_4_2.data.mtd mnist_labels_hot_4_3.data.mtd mnist_test_features_4_4.data.mtd mnist_test_labels.data.mtd +fed_mnist_labels_4.json mnist_features_4_2.data mnist_labels_4_3.data mnist_labels_hot_4_4.data mnist_test_features.data mnist_test_labels_hot_4_1.data +fed_mnist_labels_4.json.mtd mnist_features_4_2.data.mtd mnist_labels_4_3.data.mtd mnist_labels_hot_4_4.data.mtd mnist_test_features.data.mtd mnist_test_labels_hot_4_1.data.mtd +fed_mnist_labels_hot_4.json mnist_features_4_3.data mnist_labels_4_4.data mnist_labels_hot.data mnist_test_labels_4_1.data mnist_test_labels_hot_4_2.data +fed_mnist_labels_hot_4.json.mtd mnist_features_4_3.data.mtd mnist_labels_4_4.data.mtd mnist_labels_hot.data.mtd mnist_test_labels_4_1.data.mtd mnist_test_labels_hot_4_2.data.mtd +fed_mnist_test_features_4.json mnist_features_4_4.data mnist_labels.data mnist_test_features_4_1.data mnist_test_labels_4_2.data mnist_test_labels_hot_4_3.data +fed_mnist_test_features_4.json.mtd mnist_features_4_4.data.mtd mnist_labels.data.mtd mnist_test_features_4_1.data.mtd mnist_test_labels_4_2.data.mtd mnist_test_labels_hot_4_3.data.mtd +fed_mnist_test_labels_4.json mnist_features.data mnist_labels_hot_4_1.data mnist_test_features_4_2.data mnist_test_labels_4_3.data mnist_test_labels_hot_4_4.data +fed_mnist_test_labels_4.json.mtd mnist_features.data.mtd mnist_labels_hot_4_1.data.mtd mnist_test_features_4_2.data.mtd mnist_test_labels_4_3.data.mtd mnist_test_labels_hot_4_4.data.mtd +fed_mnist_test_labels_hot_4.json mnist_labels_4_1.data mnist_labels_hot_4_2.data mnist_test_features_4_3.data mnist_test_labels_4_4.data mnist_test_labels_hot.data +fed_mnist_test_labels_hot_4.json.mtd mnist_labels_4_1.data.mtd mnist_labels_hot_4_2.data.mtd mnist_test_features_4_3.data.mtd mnist_test_labels_4_4.data.mtd mnist_test_labels_hot.data.mtd ``` ## Step 4: Start Workers @@ -104,19 +127,38 @@ Now everything is setup, simply start the workers using the startAllWorkers scri output: ```txt -Me:~/github/federatedTutorial$ ./startAllWorkers.sh +Me:~/.../scripts/tutorials/federated$ ./startAllWorkers.sh Starting Workers. +Starting monitoring +/home/baunsgaard/github/systemds +Starting worker XPS-15-7590 8003 def +Starting UI Starting worker XPS-15-7590 8002 def Starting worker XPS-15-7590 8001 def +Starting worker XPS-15-7590 8004 def + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 130 100 79 100 51 87 56 --:--:-- --:--:-- --:--:-- 144 + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 130 100 79 100 51 7380 4764 --:--:-- --:--:-- --:--:-- 13000 + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 130 100 79 100 51 8005 5168 --:--:-- --:--:-- --:--:-- 14444 + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 130 100 79 100 51 6462 4171 --:--:-- --:--:-- --:--:-- 10833 +A Monitoring tool is started at http://localhost:4200 ``` -The workers will start and some temporary files will be created containing the PID for the worker, to enable specific termination of the worker after experimentation is done. Note that you can run the algorithm multiple times with the same workers. +The workers will start and some temporary files will be created containing the PID for the worker, to enable specific termination of the worker after experimentation is done. +Note that you can run the algorithm multiple times with the same workers. ```txt -Me:~/github/federatedTutorial$ ls tmp/worker/ -XPS-15-7590-8001 XPS-15-7590-8002 -Me:~/github/federatedTutorial$ cat tmp/worker/XPS-15-7590-8001 -13850 +Me:~/.../scripts/tutorials/federated$ ls tmp/worker +XPS-15-7590-8001 XPS-15-7590-8002 XPS-15-7590-8003 XPS-15-7590-8004 +Me:~/.../scripts/tutorials/federated$ cat tmp/worker/XPS-15-7590-8001 +32528 ``` Also worth noting is that all the output from the federated worker is concatenated to: results/fed/workerlog/ @@ -139,7 +181,7 @@ Note these process will just continue running in the background so have to be ma ## Step 6: run algorithms -This tutorial is using different scripts depending on what is outcommented in the run.sh. +This tutorial is using different scripts depending on what is out commented in the run.sh. Please go into this file and enable which specific script you want to run. @@ -238,7 +280,7 @@ Me:~/.../scripts/tutorials/federated$ ./stopAllWorkers.sh Stopping workers XPS-15-7590 Stopping Monitoring STOP NPM manually!! Process ID: -62870q +62870 ``` As specified by the output npm is still running and have to be manually stopped. diff --git a/scripts/tutorials/federated/code/dataGen/slice.dml b/scripts/tutorials/federated/code/dataGen/slice.dml index 8fdc7bceace..d59557fbd5a 100644 --- a/scripts/tutorials/federated/code/dataGen/slice.dml +++ b/scripts/tutorials/federated/code/dataGen/slice.dml @@ -1,5 +1,23 @@ - - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- X = read( "data/" + $1 + ".data") parts = $2 diff --git a/scripts/tutorials/federated/code/exp/mLogReg.dml b/scripts/tutorials/federated/code/exp/mLogReg.dml index 165c85ba913..97b8bbf7559 100644 --- a/scripts/tutorials/federated/code/exp/mLogReg.dml +++ b/scripts/tutorials/federated/code/exp/mLogReg.dml @@ -24,7 +24,7 @@ Y = read($2) + 1 Xt = read($3) Yt = read($4) + 1 -beta = multiLogReg(X=X, Y=Y, verbose=$5, maxi=5) +beta = multiLogReg(X=X, Y=Y, verbose=$5, maxi=100) [m, pred, acc] = multiLogRegPredict(X=Xt, B=beta, Y=Yt) diff --git a/scripts/tutorials/federated/parameters.sh b/scripts/tutorials/federated/parameters.sh index 1d67f9a619a..77abb3de3a9 100755 --- a/scripts/tutorials/federated/parameters.sh +++ b/scripts/tutorials/federated/parameters.sh @@ -29,15 +29,23 @@ export PATH="$SYSTEMDS_ROOT/bin:$PATH" ## Logging variables: # Set logging properties for the system +# Off disable the logging export LOG4JPROP='conf/log4j-off.properties' # export LOG4JPROP='conf/log4j-debug.properties' # export LOG4JPROP='conf/log4j-info.properties' + +# Set the system to start up on quiet mode, to not print excessively on every execution. export SYSDS_QUIET=1 + +# Set the addresses of your federated workers. + # address=("tango" "delta" "india" "echo") # address=("tango" "delta") address=("localhost" "localhost" "localhost" "localhost") + +# We assume for the scripts to work that each worker have a unique port ports=("8001" "8002" "8003" "8004") numWorkers=${#address[@]} @@ -46,6 +54,8 @@ numWorkers=${#address[@]} remoteDir="github/federatedTutorial-v3/" # configuration: +# This define the configuration file to be used for the execution. +# Change this to enable different settings of SystemDS conf="def" # conf="ssl" diff --git a/scripts/tutorials/federated/run.sh b/scripts/tutorials/federated/run.sh index d465469f87c..402dcab9b7f 100755 --- a/scripts/tutorials/federated/run.sh +++ b/scripts/tutorials/federated/run.sh @@ -39,11 +39,11 @@ source parameters.sh # -fedMonitoringAddress "http://localhost:8080" # Execute a Multi Log Regression model, do prediction and print confusion matrix -# systemds code/exp/mLogReg.dml \ -# -config conf/$conf.xml \ -# -stats 30 \ -# -args $x $y $xt $yt TRUE \ -# -fedMonitoringAddress "http://localhost:8080" +systemds code/exp/mLogReg.dml \ + -config conf/$conf.xml \ + -stats 30 \ + -args $x $y $xt $yt TRUE \ + -fedMonitoringAddress "http://localhost:8080" # Execute locally to compare # systemds code/exp/mLogReg.dml \ @@ -51,10 +51,10 @@ source parameters.sh # -stats 100 \ # -args $x_loc $y_loc $xt_loc $yt_loc TRUE -systemds code/exp/CNN.dml \ - -stats \ - -args $x $y_hot $xt $yt_hot \ - -fedMonitoringAddress "http://localhost:8080" +# systemds code/exp/CNN.dml \ +# -stats \ +# -args $x $y_hot $xt $yt_hot \ +# -fedMonitoringAddress "http://localhost:8080" # systemds code/exp/sumRepeat.dml \ From b4bc2ab1f7ace7b939da112d0fe3f1f916715083 Mon Sep 17 00:00:00 2001 From: baunsgaard Date: Mon, 26 Jun 2023 16:55:24 +0200 Subject: [PATCH 4/4] [MINOR] Update Python generated builtins --- .../systemds/operator/algorithm/__init__.py | 20 ++++++ .../algorithm/builtin/decisionTree.py | 6 ++ .../algorithm/builtin/flattenQuantile.py | 48 ++++++++++++++ .../operator/algorithm/builtin/mae.py | 59 +++++++++++++++++ .../operator/algorithm/builtin/mape.py | 65 +++++++++++++++++++ .../operator/algorithm/builtin/mse.py | 59 +++++++++++++++++ .../operator/algorithm/builtin/msmape.py | 62 ++++++++++++++++++ .../operator/algorithm/builtin/nrmse.py | 59 +++++++++++++++++ .../operator/algorithm/builtin/psnr.py | 49 ++++++++++++++ .../operator/algorithm/builtin/rmse.py | 59 +++++++++++++++++ .../operator/algorithm/builtin/skewness.py | 45 +++++++++++++ .../operator/algorithm/builtin/smape.py | 65 +++++++++++++++++++ 12 files changed, 596 insertions(+) create mode 100644 src/main/python/systemds/operator/algorithm/builtin/flattenQuantile.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/mae.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/mape.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/mse.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/msmape.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/nrmse.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/psnr.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/rmse.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/skewness.py create mode 100644 src/main/python/systemds/operator/algorithm/builtin/smape.py diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py index 769ca66229a..d0bc373f886 100644 --- a/src/main/python/systemds/operator/algorithm/__init__.py +++ b/src/main/python/systemds/operator/algorithm/__init__.py @@ -61,6 +61,7 @@ from .builtin.fit_pipeline import fit_pipeline from .builtin.fixInvalidLengths import fixInvalidLengths from .builtin.fixInvalidLengthsApply import fixInvalidLengthsApply +from .builtin.flattenQuantile import flattenQuantile from .builtin.frameSort import frameSort from .builtin.frequencyEncode import frequencyEncode from .builtin.frequencyEncodeApply import frequencyEncodeApply @@ -113,11 +114,15 @@ from .builtin.lmPredict import lmPredict from .builtin.lmPredictStats import lmPredictStats from .builtin.logSumExp import logSumExp +from .builtin.mae import mae +from .builtin.mape import mape from .builtin.matrixProfile import matrixProfile from .builtin.mcc import mcc from .builtin.mdedup import mdedup from .builtin.mice import mice from .builtin.miceApply import miceApply +from .builtin.mse import mse +from .builtin.msmape import msmape from .builtin.msvm import msvm from .builtin.msvmPredict import msvmPredict from .builtin.multiLogReg import multiLogReg @@ -127,6 +132,7 @@ from .builtin.naiveBayesPredict import naiveBayesPredict from .builtin.normalize import normalize from .builtin.normalizeApply import normalizeApply +from .builtin.nrmse import nrmse from .builtin.outlier import outlier from .builtin.outlierByArima import outlierByArima from .builtin.outlierByIQR import outlierByIQR @@ -138,8 +144,10 @@ from .builtin.pcaTransform import pcaTransform from .builtin.pnmf import pnmf from .builtin.ppca import ppca +from .builtin.psnr import psnr from .builtin.randomForest import randomForest from .builtin.randomForestPredict import randomForestPredict +from .builtin.rmse import rmse from .builtin.scale import scale from .builtin.scaleApply import scaleApply from .builtin.scaleMinMax import scaleMinMax @@ -149,7 +157,9 @@ from .builtin.sherlockPredict import sherlockPredict from .builtin.shortestPath import shortestPath from .builtin.sigmoid import sigmoid +from .builtin.skewness import skewness from .builtin.slicefinder import slicefinder +from .builtin.smape import smape from .builtin.smote import smote from .builtin.softmax import softmax from .builtin.split import split @@ -215,6 +225,7 @@ 'fit_pipeline', 'fixInvalidLengths', 'fixInvalidLengthsApply', + 'flattenQuantile', 'frameSort', 'frequencyEncode', 'frequencyEncodeApply', @@ -267,11 +278,15 @@ 'lmPredict', 'lmPredictStats', 'logSumExp', + 'mae', + 'mape', 'matrixProfile', 'mcc', 'mdedup', 'mice', 'miceApply', + 'mse', + 'msmape', 'msvm', 'msvmPredict', 'multiLogReg', @@ -281,6 +296,7 @@ 'naiveBayesPredict', 'normalize', 'normalizeApply', + 'nrmse', 'outlier', 'outlierByArima', 'outlierByIQR', @@ -292,8 +308,10 @@ 'pcaTransform', 'pnmf', 'ppca', + 'psnr', 'randomForest', 'randomForestPredict', + 'rmse', 'scale', 'scaleApply', 'scaleMinMax', @@ -303,7 +321,9 @@ 'sherlockPredict', 'shortestPath', 'sigmoid', + 'skewness', 'slicefinder', + 'smape', 'smote', 'softmax', 'split', diff --git a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py index 38ab517a8a5..1deb225ce19 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py +++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py @@ -70,6 +70,12 @@ def decisionTree(X: Matrix, candidates at tree nodes: m = ceil(num_features^max_features) :param max_values: Parameter controlling the number of values per feature used as split candidates: nb = ceil(num_values^max_values) + :param max_dataratio: Parameter in [0,1] controlling when to materialize data + subsets of X and y on node splits. When set to 0, we always + scan the original X and y, which has the benefit of avoiding + the allocation and maintenance of data for all active nodes. + When set to 0.01 we rematerialize whenever the sub-tree data + would be less than 1% of last the parent materialize data size. :param impurity: Impurity measure: entropy, gini (default), rss (regression) :param seed: Fixed seed for randomization of samples and split candidates :param verbose: Flag indicating verbose debug output diff --git a/src/main/python/systemds/operator/algorithm/builtin/flattenQuantile.py b/src/main/python/systemds/operator/algorithm/builtin/flattenQuantile.py new file mode 100644 index 00000000000..0d0988c8240 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/flattenQuantile.py @@ -0,0 +1,48 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/flattenQuantile.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def flattenQuantile(X: Matrix, + P: Matrix): + """ + Returns the quantiles requested, but treating the input matrix X as a flattened matrix + to return quantiles of all cells as if it was a continuous allocation. + + + + :param X: Matrix with values to extract quantiles from. + :param P: Quantiles to extract as well if empty matrix not calculated + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'P': P} + return Matrix(X.sds_context, + 'flattenQuantile', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/mae.py b/src/main/python/systemds/operator/algorithm/builtin/mae.py new file mode 100644 index 00000000000..001a9250235 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/mae.py @@ -0,0 +1,59 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/mae.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def mae(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the means absolute error between the two inputs + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :param P: Quantiles to extract as well if empty matrix not calculated + :return: Mean absolute error + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'mae', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/mape.py b/src/main/python/systemds/operator/algorithm/builtin/mape.py new file mode 100644 index 00000000000..b5276ac8b53 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/mape.py @@ -0,0 +1,65 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/mape.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def mape(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the means absolute percentage error between the two inputs + + + Monash Time Series Forecasting Archive + Rakshitha Godahewaa,∗, Christoph Bergmeira , Geoffrey I. Webba , Rob J. Hyndmanb , + Pablo Montero-Mansoc + + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :param P: Quantiles to extract as well if empty matrix not calculated + :return: Mean absolute percentage error + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'mape', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/mse.py b/src/main/python/systemds/operator/algorithm/builtin/mse.py new file mode 100644 index 00000000000..47c6fa49348 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/mse.py @@ -0,0 +1,59 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/mse.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def mse(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the means square error between the two inputs + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :param P: Quantiles to extract as well if empty matrix not calculated + :return: Mean Square error + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'mse', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/msmape.py b/src/main/python/systemds/operator/algorithm/builtin/msmape.py new file mode 100644 index 00000000000..7e567278328 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/msmape.py @@ -0,0 +1,62 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/msmape.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def msmape(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the modified symmetric means absolute percentage error between the two inputs + + + Monash Time Series Forecasting Archive + Rakshitha Godahewaa,∗, Christoph Bergmeira , Geoffrey I. Webba , Rob J. Hyndmanb , + Pablo Montero-Mansoc + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :return: The modified symmetric mean absolute percentage error + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'msmape', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/nrmse.py b/src/main/python/systemds/operator/algorithm/builtin/nrmse.py new file mode 100644 index 00000000000..4e89d0fe97d --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/nrmse.py @@ -0,0 +1,59 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/nrmse.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def nrmse(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the normalized root means square error between the two inputs + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :param P: Quantiles to extract as well if empty matrix not calculated + :return: The normalized root means square error + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'nrmse', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/psnr.py b/src/main/python/systemds/operator/algorithm/builtin/psnr.py new file mode 100644 index 00000000000..ee7b8149894 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/psnr.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/psnr.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def psnr(X: Matrix, + Y: Matrix): + """ + Returns the peak signal to noise ratio + + https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :return: The peak signal to noise ratio + """ + + params_dict = {'X': X, 'Y': Y} + return Matrix(X.sds_context, + 'psnr', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/rmse.py b/src/main/python/systemds/operator/algorithm/builtin/rmse.py new file mode 100644 index 00000000000..eb27b93f567 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/rmse.py @@ -0,0 +1,59 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/rmse.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def rmse(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the root means square error between the two inputs + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :param P: Quantiles to extract as well if empty matrix not calculated + :return: The root means square error + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'rmse', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/skewness.py b/src/main/python/systemds/operator/algorithm/builtin/skewness.py new file mode 100644 index 00000000000..033987c2efa --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/skewness.py @@ -0,0 +1,45 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/skewness.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def skewness(X: Matrix): + """ + Returns the skewness of the matrix input + + + + :param X: The matrix input + :return: The skewness of the input matrix + """ + + params_dict = {'X': X} + return Matrix(X.sds_context, + 'skewness', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/smape.py b/src/main/python/systemds/operator/algorithm/builtin/smape.py new file mode 100644 index 00000000000..70a86361f62 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/smape.py @@ -0,0 +1,65 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/smape.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def smape(X: Matrix, + Y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + Returns the symmetric means absolute percentage error between the two inputs + + Monash Time Series Forecasting Archive + Rakshitha Godahewaa, Christoph Bergmeira, Geoffrey I. Webba, Rob J. Hyndmanb, + Pablo Montero-Mansoc + + Another Look at Measures of Forecast Accuracy, R. J. Hyndman and A. B. Koehler, 2006. + + + + :param X: First Matrix to compare + :param Y: Second Matrix to compare + :param P: Quantiles to extract as well if empty matrix not calculated + :return: The symmetric mean absolute percentage error + :return: Quantiles calculated + """ + + params_dict = {'X': X, 'Y': Y} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'smape', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + + return op