Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions conf/zeppelin-site.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,47 @@
</property>
-->


<!-- Notebook storage layer using Hadoop file system api
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.HDFSNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>

<property>
<name>zeppelin.hadoop.uri</name>
<value>hdfs://localhost:8020</value>
<description>The scheme of the URI determines a configuration property name, fs.scheme.class whose value names the FileSystem class.
e.g. `hdfs://localhost:8020`, or `file:///`
Copy link
Copy Markdown
Member

@felixcheung felixcheung Aug 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this might be confused with webhdfs server url? I'm not sure if there is a better name for it than hadoop.uri?

</description>
</property>

<property>
<name>kerberos.principal</name>
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be prefix with zeppelin.hadoop? there might be other uses of kerberos keytab/principal?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like the key refresh stuff for shell interpreter...

<value>zeppelin@EXAMPLE.COM</value>
<description>The principal name to load from the keytab</description>
</property>
<property>
<name>kerberos.keytab</name>
<value>/etc/security/keytabs/zeppelin.server.kerberos.keytab</value>
<description>The path to the keytab file</description>
</property>

<property>
<name>kerberos.refresh.interval</name>
<value>1d</value>
<description>(optional) The refresh interval for Kerberos ticket. The default value is 1d.</description>
</property>
<property>
<name>kinit.fail.threshold</name>
<value>5</value>
<description>(optional) How many times should kinit retry. The default value is 5.</description>
</property>


-->

<!-- For connecting your Zeppelin with ZeppelinHub -->
<!--
<property>
Expand Down
57 changes: 57 additions & 0 deletions docs/setup/storage/storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ There are few notebook storage systems available for a use out of the box:

* (default) use local file system and version it using local Git repository - `GitNotebookRepo`
* all notes are saved in the notebook folder in your local File System - `VFSNotebookRepo`
* all notes are saved in the notebook folder in HDFS - `HDFSNotebookRepo`
* storage using Amazon S3 service - `S3NotebookRepo`
* storage using Azure service - `AzureNotebookRepo`
* storage using MongoDB - `MongoNotebookRepo`
Expand All @@ -53,6 +54,62 @@ To enable versioning for all your local notebooks though a standard Git reposito

</br>

## Notebook Storage in HDFS repository <a name="HDFS"></a>

Notes may be stored in HDFS, so that multiple Zeppelin instance can share the same notes. It supports all the versions of hadoop 2.x. If you use `HdfsNotebookRepo`, then `zeppelin.notebook.dir` is the path on HDFS.

```
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.HDFSNotebookRepo</value>
<description>hdfs notebook persistence layer implementation</description>
</property>

<property>
<name>zeppelin.hadoop.uri</name>
<value>hdfs://localhost.novalocal:8020</value>
<description>The scheme of the URI determines a configuration property name, fs.scheme.class whose value names the FileSystem class.
e.g. `hdfs://localhost.novalocal:8020`, or `file:///`
</description>
</property>

<property>
<name>zeppelin.notebook.dir</name>
<value>notebook</value>
<description>path or URI for notebook persist</description>
</property>
```

</br>

**(optional) Configuration if kerberos is enabled on a HDFS cluster**

```
<property>
<name>kerberos.principal</name>
<value>zeppelin@EXAMPLE.COM</value>
<description>The principal name to load from the keytab</description>
</property>
<property>
<name>kerberos.keytab</name>
<value>/etc/security/keytabs/zeppelin.server.kerberos.keytab</value>
<description>The path to the keytab file</description>
</property>

<property>
<name>kerberos.refresh.interval</name>
<value>1d</value>
<description>(optional) The refresh interval for Kerberos ticket. The default value is 1d.</description>
</property>
<property>
<name>kinit.fail.threshold</name>
<value>5</value>
<description>(optional) How many times should kinit retry. The default value is 5.</description>
</property>
```

</br>

## Notebook Storage in S3 <a name="S3"></a>

Notebooks may be stored in S3, and optionally encrypted. The [``DefaultAWSCredentialsProviderChain``](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) credentials provider is used for credentials and checks the following:
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
<commons.logging.version>1.1.1</commons.logging.version>
<commons.cli.version>1.3.1</commons.cli.version>
<shiro.version>1.3.2</shiro.version>
<hadoop-common.version>2.6.0</hadoop-common.version>

<!-- test library versions -->
<junit.version>4.12</junit.version>
Expand Down
1 change: 0 additions & 1 deletion zeppelin-server/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
<!--library versions-->
<commons.httpclient.version>4.3.6</commons.httpclient.version>
<jersey.version>2.22.2</jersey.version>
<hadoop-common.version>2.6.0</hadoop-common.version>
<quartz.scheduler.version>2.2.1</quartz.scheduler.version>
<jersey.servlet.version>1.13</jersey.servlet.version>
<javax.ws.rsapi.version>2.0.1</javax.ws.rsapi.version>
Expand Down
121 changes: 121 additions & 0 deletions zeppelin-zengine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,127 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop-common.version}</version>
<exclusions>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>jackrabbit-webdav</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jgit</groupId>
<artifactId>org.eclipse.jgit</artifactId>
</exclusion>
<exclusion>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop-common.version}</version>

<exclusions>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>jackrabbit-webdav</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jgit</groupId>
<artifactId>org.eclipse.jgit</artifactId>
</exclusion>
<exclusion>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</exclusion>
<exclusion>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,30 @@ public String getXxssProtection() {
return getString(ConfVars.ZEPPELIN_SERVER_X_XSS_PROTECTION);
}

public String getZeppelinHadoopUri() {
return getString(ConfVars.ZEPPELIN_HADOOP_URI);
}

public String getStrictTransport() {
return getString(ConfVars.ZEPPELIN_SERVER_STRICT_TRANSPORT);
}

public String getKerberosRefreshInterval(){
return getString(ConfVars.KERBEROS_REFRESH_INTERVAL);
}

public String getKinitFailThreshold(){
return getString(ConfVars.KINIT_FAIL_THRESHOLD);
}

public String getKerberosPrincipal(){
return getString(ConfVars.KERBEROS_PRINCIPAL);
}

public String getKerberoskeyTab(){
return getString(ConfVars.KERBEROS_KEYTAB);
}


public Map<String, String> dumpConfigurations(ZeppelinConfiguration conf,
ConfigurationKeyPredicate predicate) {
Expand Down Expand Up @@ -681,7 +701,12 @@ public static enum ConfVars {
ZEPPELIN_SERVER_XFRAME_OPTIONS("zeppelin.server.xframe.options", "SAMEORIGIN"),
ZEPPELIN_SERVER_JETTY_NAME("zeppelin.server.jetty.name", null),
ZEPPELIN_SERVER_STRICT_TRANSPORT("zeppelin.server.strict.transport", "max-age=631138519"),
ZEPPELIN_SERVER_X_XSS_PROTECTION("zeppelin.server.xxss.protection", "1");
ZEPPELIN_SERVER_X_XSS_PROTECTION("zeppelin.server.xxss.protection", "1"),
ZEPPELIN_HADOOP_URI("zeppelin.hadoop.uri", "hdfs://localhost:8020"),
KERBEROS_REFRESH_INTERVAL("kerberos.refresh.interval", "1d"),
KINIT_FAIL_THRESHOLD("kinit.fail.threshold", 5),
KERBEROS_PRINCIPAL("kerberos.principal", null),
KERBEROS_KEYTAB("kerberos.keytab", null);

private String varName;
@SuppressWarnings("rawtypes")
Expand Down
Loading