diff --git a/codeship-steps.yml b/codeship-steps.yml index cf7b660..e180eb7 100644 --- a/codeship-steps.yml +++ b/codeship-steps.yml @@ -7,7 +7,7 @@ dockercfg_service: dockercfg_generator - service: app type: push - tag: refactor/pandas + tag: enhancement/copy-fail-logs-to-s3 image_tag: dev image_name: 674223647607.dkr.ecr.us-east-1.amazonaws.com/tesera/learn registry: https://674223647607.dkr.ecr.us-east-1.amazonaws.com diff --git a/learn/cli.py b/learn/cli.py index ac62534..efe4578 100644 --- a/learn/cli.py +++ b/learn/cli.py @@ -32,16 +32,13 @@ """ import os -import sys import shutil -from urlparse import urlparse, urljoin +from urlparse import urlparse import tempfile -from signal import * import logging import boto3 from docopt import docopt -from schema import Schema, And, Or, SchemaError, Optional from clients.varselect import VarSelect from clients.analyze import Analyze @@ -87,73 +84,97 @@ def cli(): command = 'discrat' infiles = files[command] - - if (s3bucket) : + if s3bucket: logger.info("copying s3 data files locally") try: s3_client = boto3.client('s3') for infile in infiles: - logger.info("copying %s from S3" % infile) + logger.info("copying %s from S3", infile) url = urlparse(infile) file_path = os.path.join(tmpdir, infile.split('/')[-1]) s3_client.download_file(url.netloc, url.path.strip('/'), file_path) args[args.keys()[args.values().index(infile)]] = file_path except Exception as e: - # logger.error("error copying data from s3: %s %s", command, e) - exit(e) - else : + logger.error("error copying data from s3: %s %s", command, e) + exit(1) + else: for infile in infiles: shutil.copy(infile, tmpdir) - # rename the passed in filenames to the legacy filenames jic they where hardcoded - legacy_config = 'XVARSELV1.csv' if command is 'varsel' else 'XVARSELV.csv' - - legacy = { - '--xy-data': os.path.join(tmpdir, 'ANALYSIS.csv'), - '--config': os.path.join(tmpdir, legacy_config) - } - - if command in ['varsel', 'lda']: - os.rename(os.path.join(tmpdir, os.path.basename(args['--config'])), legacy['--config']) - args['--config'] = legacy['--config'] - - os.rename(os.path.join(tmpdir, os.path.basename(args['--xy-data'])), legacy['--xy-data']) - args['--xy-data'] = legacy['--xy-data'] - # make a list of the input filenames passsed in so we don't copy them to the output infilenames = [os.path.basename(f) for f in os.listdir(tmpdir)] + log_files = ['pylearn.log'] if command in ['describe', 'discrat'] \ + else ['pylearn.log', 'rlearn.log'] try: client = clients[command]() client.run(args) - - # copy only output files to output dir - if(s3bucket): - logger.info("Copying results to s3 bucket %s to prefix %s", s3bucket, s3prefix) - for filename in os.listdir(tmpdir): - if filename not in infilenames: - filepath = os.path.join(tmpdir, filename) - key = "%s/%s" % (s3prefix, filename) - logger.info("Copying %s to %s", filepath, key) - s3_client.upload_file(filepath, s3bucket, key) - - for logfile in ['pylearn.log']: - logfile_path = os.path.join(os.getcwd(), logfile) - key = "%s/%s" % (s3prefix, logfile) - logger.info("Copying logfile %s to %s", logfile, key) - s3_client.upload_file(logfile_path, s3bucket, key) + if s3bucket: + copy_output_files_to_s3(s3_client, s3bucket, s3prefix, + tmpdir, infilenames, log_files) else: - logger.info("Copying results to %s", outdir) - - if not os.path.exists(outdir): - os.makedirs(outdir) - - for outfile in os.listdir(tmpdir): - if outfile not in infilenames: - shutil.copy(os.path.join(tmpdir, outfile), os.path.join(outdir, outfile)) + copy_output_files_to_folder(outdir, tmpdir, infilenames) exit(0) except Exception as e: logging.error(e) + if s3bucket: + copy_output_files_to_s3(s3_client, s3bucket, s3prefix, + tmpdir, infilenames, log_files) + else: + copy_output_files_to_folder(outdir, tmpdir, infilenames) + exit(1) + +def copy_output_files_to_s3(s3client, s3bucket, s3prefix, in_dir, + exclude_files, cwd_files): + """Copy files to s3 bucket + + :param s3bucket: s3 bucket URI, no trailing slash + :param s3prefix: folder, no leading slash + :param in_dir: directory from which to copy files + :param exclude: paths to exclude relative to dir + :cwd_files: additional files to copy from the working directory + + """ + logger.info("Copying results to s3 bucket %s to prefix %s", s3bucket, s3prefix) + for filename in os.listdir(in_dir): + if filename in exclude_files: + continue + filepath = os.path.join(in_dir, filename) + key = "%s/%s" % (s3prefix, filename) + logger.info("Copying %s to %s", filepath, key) + s3client.upload_file(filepath, s3bucket, key) + + for filename in cwd_files: + logfile_path = os.path.join(os.getcwd(), filename) + key = "%s/%s" % (s3prefix, filename) + logger.info("Copying logfile %s to %s", filename, key) + if os.path.exists(logfile_path): + s3client.upload_file(logfile_path, s3bucket, key) + else: + logger.warning('File %s cannot be copied to s3; file does not exist', + logfile_path) + + +def copy_output_files_to_folder(destination, in_dir, exclude_files): + """Copy files to s3 bucket + + :param destination: destination directory for files + :param in_dir: directory from which to copy files + :param exclude_files: paths to exclude relative to dir + + """ + logger.info("Copying results to %s", destination) + + if not os.path.exists(destination): + os.makedirs(destination) + + for outfile in os.listdir(in_dir): + if outfile in exclude_files: + continue + shutil.copy(os.path.join(in_dir, outfile), os.path.join(destination, outfile)) + + + diff --git a/learn/clients/discrating.py b/learn/clients/discrating.py index d064f15..a0afbc3 100644 --- a/learn/clients/discrating.py +++ b/learn/clients/discrating.py @@ -1,13 +1,13 @@ import os import logging - import pandas as pd -from pylearn.varset import get_param from pylearn.discrating import predict + logger = logging.getLogger('pylearn') + class Discrating(object): def __init__(self): diff --git a/setup.py b/setup.py index f718d6d..f077e08 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='learn-cli', - version='0.1.0', + version='0.1.1', description=u"Learn Model Builder", classifiers=[], keywords='',