cd Dep
sbt clean publishLocal
sbt assembly
Use any Spark version compatible with Scala version used in the project.
# Create a dir for logs, default file:/tmp/spark-events
mkdir /tmp/spark-events
# start Spark History Server
bash $SPARK_HOME/sbin/start-history-server.sh
bash scripts/generic/submit.sh \
target/scala-2.12/AlgoSEDD-assembly-0.1-SNAPSHOT.jar \
local[2] \
2010-01-01 \
2022-01-01 \
'8 weeks' \
/mnt/datastore/data/StackExchangeDataDump/2021-06-07/3dprinting.meta.stackexchange.com \
target/output/
Use any Spark version compatible with Scala version used in the project.
gcloud dataproc jobs submit spark \
--cluster=cluster-5cff \
--class=pl.epsilondeltalimit.analyzer.StackExchangeDataDumpAnalyzerSingle \
--jars=gs://stack-exchange-data-dump-analyzer-single/StackExchangeDataDumpAnalyzerSingle-0.1-SNAPSHOT-jar-with-dependencies.jar \
--region=europe-west3 \
--driver-log-levels root=DEBUG \
-- 2010-01-01 2021-01-01 '13 weeks' gs://stack-exchange-data-dump/scifi.stackexchange.com/Badges.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/Comments.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/PostHistory.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/PostLinks.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/Posts.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/Tags.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/Users.xml gs://stack-exchange-data-dump/scifi.stackexchange.com/Votes.xml gs://stack-exchange-data-dump-analyzer-single/output/scifi.stackexchange.com/13weeks
# relative popularity
#bash scripts/plot/generic/plot_relative_popularity_tag.sh \
# <csv result file> \
# <tag name> \
# <aggregation interval> \
# <y axis max> \
# <optional >
#e.g.
#bash scripts/relative_popularity_plot_tag.sh \
# output/tag\=print-quality/part-00000-9b6e8399-3e48-4a97-a355-4b239b975515.c000.csv \
# print-quality \
# 8weeks \
# 1.0 \
# 0.1
# entries count
bash scripts/plot/generic/plot_entries_count_tag.sh \
target/output/scifi.stackexchange.com/tag\=star-wars/part-00000-28150a61-065b-4e24-8435-3b71b4911bcf.c000.csv \
star-wars \
8weeks \
100000