From 2b0ea8cf392377c507a2f2ad7f37663e012046bd Mon Sep 17 00:00:00 2001 From: Jotham Apaloo Date: Tue, 31 May 2016 11:40:46 -0700 Subject: [PATCH 1/5] Copy logs to s3 if client fails, rm unused imports --- learn/cli.py | 104 +++++++++++++++++++++++------------- learn/clients/discrating.py | 4 +- 2 files changed, 70 insertions(+), 38 deletions(-) diff --git a/learn/cli.py b/learn/cli.py index ac62534..4556be6 100644 --- a/learn/cli.py +++ b/learn/cli.py @@ -32,16 +32,13 @@ """ import os -import sys import shutil -from urlparse import urlparse, urljoin +from urlparse import urlparse import tempfile -from signal import * import logging import boto3 from docopt import docopt -from schema import Schema, And, Or, SchemaError, Optional from clients.varselect import VarSelect from clients.analyze import Analyze @@ -88,24 +85,23 @@ def cli(): infiles = files[command] - if (s3bucket) : + if s3bucket: logger.info("copying s3 data files locally") try: s3_client = boto3.client('s3') for infile in infiles: - logger.info("copying %s from S3" % infile) + logger.info("copying %s from S3", infile) url = urlparse(infile) file_path = os.path.join(tmpdir, infile.split('/')[-1]) s3_client.download_file(url.netloc, url.path.strip('/'), file_path) args[args.keys()[args.values().index(infile)]] = file_path except Exception as e: - # logger.error("error copying data from s3: %s %s", command, e) - exit(e) - else : + logger.error("error copying data from s3: %s %s", command, e) + exit(1) + else: for infile in infiles: shutil.copy(infile, tmpdir) - # rename the passed in filenames to the legacy filenames jic they where hardcoded legacy_config = 'XVARSELV1.csv' if command is 'varsel' else 'XVARSELV.csv' legacy = { @@ -114,10 +110,12 @@ def cli(): } if command in ['varsel', 'lda']: - os.rename(os.path.join(tmpdir, os.path.basename(args['--config'])), legacy['--config']) + os.rename(os.path.join(tmpdir, os.path.basename(args['--config'])), + legacy['--config']) args['--config'] = legacy['--config'] - os.rename(os.path.join(tmpdir, os.path.basename(args['--xy-data'])), legacy['--xy-data']) + os.rename(os.path.join(tmpdir, os.path.basename(args['--xy-data'])), + legacy['--xy-data']) args['--xy-data'] = legacy['--xy-data'] # make a list of the input filenames passsed in so we don't copy them to the output @@ -126,34 +124,68 @@ def cli(): try: client = clients[command]() client.run(args) - - # copy only output files to output dir - if(s3bucket): - logger.info("Copying results to s3 bucket %s to prefix %s", s3bucket, s3prefix) - for filename in os.listdir(tmpdir): - if filename not in infilenames: - filepath = os.path.join(tmpdir, filename) - key = "%s/%s" % (s3prefix, filename) - logger.info("Copying %s to %s", filepath, key) - s3_client.upload_file(filepath, s3bucket, key) - - for logfile in ['pylearn.log']: - logfile_path = os.path.join(os.getcwd(), logfile) - key = "%s/%s" % (s3prefix, logfile) - logger.info("Copying logfile %s to %s", logfile, key) - s3_client.upload_file(logfile_path, s3bucket, key) + if s3bucket: + copy_output_files_to_s3(s3_client, s3bucket, s3prefix, + tmpdir, infilenames, ['pylearn.log']) else: - logger.info("Copying results to %s", outdir) - - if not os.path.exists(outdir): - os.makedirs(outdir) - - for outfile in os.listdir(tmpdir): - if outfile not in infilenames: - shutil.copy(os.path.join(tmpdir, outfile), os.path.join(outdir, outfile)) + copy_output_files_to_folder(outdir, tmpdir, infilenames) exit(0) except Exception as e: logging.error(e) + if s3bucket: + copy_output_files_to_s3(s3_client, s3bucket, s3prefix, + tmpdir, infilenames, ['pylearn.log']) + else: + copy_output_files_to_folder(outdir, tmpdir, infilenames) + exit(1) + +def copy_output_files_to_s3(s3client, s3bucket, s3prefix, in_dir, + exclude_files, cwd_files): + """Copy files to s3 bucket + + :param s3bucket: s3 bucket URI, no trailing slash + :param s3prefix: folder, no leading slash + :param in_dir: directory from which to copy files + :param exclude: paths to exclude relative to dir + :cwd_files: additional files to copy from the working directory + + """ + logger.info("Copying results to s3 bucket %s to prefix %s", s3bucket, s3prefix) + for filename in os.listdir(in_dir): + if filename in exclude_files: + continue + filepath = os.path.join(in_dir, filename) + key = "%s/%s" % (s3prefix, filename) + logger.info("Copying %s to %s", filepath, key) + s3client.upload_file(filepath, s3bucket, key) + + for filename in cwd_files: + logfile_path = os.path.join(os.getcwd(), filename) + key = "%s/%s" % (s3prefix, filename) + logger.info("Copying logfile %s to %s", filename, key) + s3client.upload_file(logfile_path, s3bucket, key) + + +def copy_output_files_to_folder(destination, in_dir, exclude_files): + """Copy files to s3 bucket + + :param destination: destination directory for files + :param in_dir: directory from which to copy files + :param exclude_files: paths to exclude relative to dir + + """ + logger.info("Copying results to %s", destination) + + if not os.path.exists(destination): + os.makedirs(destination) + + for outfile in os.listdir(in_dir): + if outfile in exclude_files: + continue + shutil.copy(os.path.join(in_dir, outfile), os.path.join(destination, outfile)) + + + diff --git a/learn/clients/discrating.py b/learn/clients/discrating.py index d064f15..a0afbc3 100644 --- a/learn/clients/discrating.py +++ b/learn/clients/discrating.py @@ -1,13 +1,13 @@ import os import logging - import pandas as pd -from pylearn.varset import get_param from pylearn.discrating import predict + logger = logging.getLogger('pylearn') + class Discrating(object): def __init__(self): From 8aca8710c4f96f40a9b4983804877e53ae1b71d5 Mon Sep 17 00:00:00 2001 From: Jotham Apaloo Date: Tue, 31 May 2016 11:41:39 -0700 Subject: [PATCH 2/5] Remove legacy config handling closes #23 --- learn/cli.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/learn/cli.py b/learn/cli.py index 4556be6..e447736 100644 --- a/learn/cli.py +++ b/learn/cli.py @@ -102,22 +102,6 @@ def cli(): for infile in infiles: shutil.copy(infile, tmpdir) - legacy_config = 'XVARSELV1.csv' if command is 'varsel' else 'XVARSELV.csv' - - legacy = { - '--xy-data': os.path.join(tmpdir, 'ANALYSIS.csv'), - '--config': os.path.join(tmpdir, legacy_config) - } - - if command in ['varsel', 'lda']: - os.rename(os.path.join(tmpdir, os.path.basename(args['--config'])), - legacy['--config']) - args['--config'] = legacy['--config'] - - os.rename(os.path.join(tmpdir, os.path.basename(args['--xy-data'])), - legacy['--xy-data']) - args['--xy-data'] = legacy['--xy-data'] - # make a list of the input filenames passsed in so we don't copy them to the output infilenames = [os.path.basename(f) for f in os.listdir(tmpdir)] From 98ddbe892171c8418a67d20ab04b71ef22b7484b Mon Sep 17 00:00:00 2001 From: Jotham Apaloo Date: Tue, 31 May 2016 11:53:55 -0700 Subject: [PATCH 3/5] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f718d6d..f077e08 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='learn-cli', - version='0.1.0', + version='0.1.1', description=u"Learn Model Builder", classifiers=[], keywords='', From 3e74a9d316d534ccb7b28b474b4d94930b1e75dd Mon Sep 17 00:00:00 2001 From: Jotham Apaloo Date: Tue, 31 May 2016 11:59:16 -0700 Subject: [PATCH 4/5] Push image for ecs/learn-app testing --- codeship-steps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeship-steps.yml b/codeship-steps.yml index cf7b660..e180eb7 100644 --- a/codeship-steps.yml +++ b/codeship-steps.yml @@ -7,7 +7,7 @@ dockercfg_service: dockercfg_generator - service: app type: push - tag: refactor/pandas + tag: enhancement/copy-fail-logs-to-s3 image_tag: dev image_name: 674223647607.dkr.ecr.us-east-1.amazonaws.com/tesera/learn registry: https://674223647607.dkr.ecr.us-east-1.amazonaws.com From ae3ceff4772bbc6522095f7d3c756fe3c56e9a85 Mon Sep 17 00:00:00 2001 From: Jotham Apaloo Date: Tue, 31 May 2016 12:44:33 -0700 Subject: [PATCH 5/5] Copy rlearn logs to s3 Ideally each client would handle this and only copy desire log files Or log file can be written to output directory --- learn/cli.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/learn/cli.py b/learn/cli.py index e447736..efe4578 100644 --- a/learn/cli.py +++ b/learn/cli.py @@ -84,7 +84,6 @@ def cli(): command = 'discrat' infiles = files[command] - if s3bucket: logger.info("copying s3 data files locally") try: @@ -104,13 +103,15 @@ def cli(): # make a list of the input filenames passsed in so we don't copy them to the output infilenames = [os.path.basename(f) for f in os.listdir(tmpdir)] + log_files = ['pylearn.log'] if command in ['describe', 'discrat'] \ + else ['pylearn.log', 'rlearn.log'] try: client = clients[command]() client.run(args) if s3bucket: copy_output_files_to_s3(s3_client, s3bucket, s3prefix, - tmpdir, infilenames, ['pylearn.log']) + tmpdir, infilenames, log_files) else: copy_output_files_to_folder(outdir, tmpdir, infilenames) @@ -120,7 +121,7 @@ def cli(): logging.error(e) if s3bucket: copy_output_files_to_s3(s3_client, s3bucket, s3prefix, - tmpdir, infilenames, ['pylearn.log']) + tmpdir, infilenames, log_files) else: copy_output_files_to_folder(outdir, tmpdir, infilenames) @@ -150,7 +151,11 @@ def copy_output_files_to_s3(s3client, s3bucket, s3prefix, in_dir, logfile_path = os.path.join(os.getcwd(), filename) key = "%s/%s" % (s3prefix, filename) logger.info("Copying logfile %s to %s", filename, key) - s3client.upload_file(logfile_path, s3bucket, key) + if os.path.exists(logfile_path): + s3client.upload_file(logfile_path, s3bucket, key) + else: + logger.warning('File %s cannot be copied to s3; file does not exist', + logfile_path) def copy_output_files_to_folder(destination, in_dir, exclude_files):