Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support SBT 1.xx and latest databricks #7

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 19 additions & 24 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,41 +1,36 @@
name := "spark-tree-plotting"

version := "0.2"
version := "0.3"

scalaVersion := "2.11.12"
scalaVersion := "2.12.10"

libraryDependencies += "net.liftweb" % "lift-json_2.11" % "3.3.0"


assemblyShadeRules in assembly := Seq(
ShadeRule.rename("net.liftweb.json.**" -> "org.lift.web.library.json.@1").inAll
libraryDependencies ++= Seq(
"net.liftweb" % "lift-json_2.12" % "3.5.0",
"org.apache.spark" % "spark-core_2.12" % "3.1.0" % "provided",
"org.apache.spark" % "spark-mllib_2.12" % "3.1.0" % "provided"
)

// Spark Packages config
spName := "julioasotodv/spark-tree-plotting"

sparkVersion := "2.3.2"

sparkComponents += "mllib"

credentials += Credentials(Path.userHome / ".ivy2" / ".sbtcredentials")
lazy val spShortDescription = "A simple tool for plotting Spark ML's Decision Trees"

spShortDescription := "A simple tool for plotting Spark ML's Decision Trees"

spDescription := """This module provides a simple tool for plotting an easy to understand graphical representation
lazy val spDescription = """This module provides a simple tool for plotting an easy to understand graphical representation
|of Spark ML's DecisionTreeClassificationModels, very similar to the one Python's Scikit-Learn provides.
|Given a DecisionTreeClassificationModel, spark_tree_plotting generates a JSON file with
|the relevant metadata in order to plot the tree. Moreover, a simple JSON-to-DOT python
|script allows you to plot trees in PySpark in a very simple manner (just as in Scikit-Learn)""".stripMargin

licenses += "MIT" -> url("https://opensource.org/licenses/MIT")

spIncludeMaven := false


// Resulting name for the assembly jar
assemblyJarName in assembly := "spark-tree-plotting_0.2.jar"
assembly / assemblyJarName := { name.value + "-assembly-" + version.value + ".jar" }

// Do not include the Scala library itself in the jar
assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)

assembly / assemblyOption := (assembly / assemblyOption).value.withIncludeScala(false)

assemblyMergeStrategy := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
case "log4j.properties" => MergeStrategy.discard
case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
case "reference.conf" => MergeStrategy.concat
case _ => MergeStrategy.first
}
2 changes: 1 addition & 1 deletion project/assembly.sbt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
resolvers += Resolver.sbtPluginRepo("releases")

addSbtPlugin("com.eed3si9n" %% "sbt-assembly" % "0.14.4")
addSbtPlugin("com.eed3si9n" %% "sbt-assembly" % "1.0.0")
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=0.13.17
sbt.version=1.6.2
3 changes: 0 additions & 3 deletions project/spark_packages.sbt

This file was deleted.

9 changes: 9 additions & 0 deletions python/spark_tree_plotting/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
A Simple tool for plotting Spark ML's Decision Trees
--
If you like to use API, you can install the package

python setup.py install

Create wheel package
--
python setup.py bdist_wheel
121 changes: 121 additions & 0 deletions python/spark_tree_plotting/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Appreciatively cargo-culted from: https://github.com/kennethreitz/setup.py

# Note: To use the 'upload' functionality of this file, you must:
# $ pip install twine

import io
import os
import sys
from shutil import rmtree

from setuptools import find_packages, setup, Command

# Package meta-data.
NAME = 'spark_tree_plotting'
DESCRIPTION = 'A simple tool for plotting Spark ML Decision Trees'
URL = 'https://github.com/julioasotodv/spark-tree-plotting'
EMAIL = '[email protected]'
AUTHOR = 'Julio Antonio Soto'

# What packages are required for this module to be executed?
REQUIRED = [
'pydot'
]

# The rest you shouldn't have to touch too much :)
# ------------------------------------------------
# Except, perhaps the License and Trove Classifiers!
# If you do change the License, remember to change the Trove Classifier for that!

here = os.path.abspath(os.path.dirname(__file__))

# Import the README and use it as the long-description.
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = '\n' + f.read()

# Load the package's __version__.py module as a dictionary.
about = {}
with open(os.path.join(here, NAME, '__version__.py')) as f:
exec(f.read(), about)


class UploadCommand(Command):
"""Support setup.py upload."""

description = 'Build and publish the package.'
user_options = []

@staticmethod
def status(s):
"""Prints things in bold."""
print('\033[1m{0}\033[0m'.format(s))

def initialize_options(self):
pass

def finalize_options(self):
pass

def run(self):
try:
self.status('Removing previous builds…')
rmtree(os.path.join(here, 'dist'))
except OSError:
pass

self.status('Building Source and Wheel (universal) distribution…')
os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))

self.status('Uploading the package to PyPi via Twine…')
os.system('twine upload dist/*')

sys.exit()


# Where the magic happens:
setup(
name=NAME,
version=about['__version__'],
description=DESCRIPTION,
long_description=long_description,
author=AUTHOR,
author_email=EMAIL,
url=URL,
packages=find_packages(exclude=('tests',)),
# If your package is a single module, use this instead of 'packages':
# py_modules=['mypackage'],

# entry_points={
# 'console_scripts': ['mycli=mymodule:cli'],
# },

test_suite = 'spark_tree_plotting.tests',

install_requires=REQUIRED,
include_package_data=True,
license='Apache Software License',
classifiers=[
# Trove classifiers
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Development Status :: 3 - Alpha',
'Topic :: Scientific/Engineering',
],
# $ setup.py publish support.
cmdclass={
'upload': UploadCommand,
},
)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.3.0'
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def plot_tree(DecisionTreeClassificationModel, featureNames=None, categoryNames=
from pydot import graph_from_dot_data
except ImportError:
raise ImportError(
"This function requires pydot3 dot be installed. You can easily install it with pip install pydot3"
"This function requires pydot dot be installed. You can easily install it with pip install pydot"
)

graph = graph_from_dot_data(export_graphviz(DecisionTreeClassificationModel,
Expand All @@ -441,4 +441,4 @@ def plot_tree(DecisionTreeClassificationModel, featureNames=None, categoryNames=
else:
plot = graph.create_png()
return plot