Source code for lmpy.tools.convert_csv_to_lmm

"""Convert a numeric (.csv) file to a lmpy Matrix (.lmm) file."""
import argparse
import json
from logging import WARN
import os

from lmpy.log import Logger
from lmpy.matrix import Matrix
from lmpy.tools._config_parser import _process_arguments, test_files

script_name = os.path.splitext(os.path.basename(__file__))[0]
DESCRIPTION = "Convert a CSV file of numerical values into a lmpy Matrix."


# .....................................................................................
[docs]def convert_csv_to_lmm( csv_filename, header_rows, header_cols, data_type_str, logger): """Convert a CSV file to a lmpy lmm Matrix. Args: csv_filename (str): The file location of the csv to convert. header_rows (int): The number of header rows in the csv. header_cols (int): The number of header columns in the csv. data_type_str (str): The type of data in the matrix, float or int. logger (lmpy.log.Logger): object for writing log statements. Raises: Exception: on data_type other than 'float' or 'int'. Returns: Matrix: A matrix from the converted csv file. """ if data_type_str == "float": data_type = float elif data_type_str == "int": data_type = int else: raise Exception(f"Unsupported data_type {data_type_str}") with open(csv_filename, mode="rt") as csv_in: mtx = Matrix.load_csv( csv_in, dtype=data_type, num_header_rows=header_rows, num_header_cols=header_cols ) logger.log( f"Read CSV file {csv_filename} of data type `{data_type_str}`, " + f"with {header_rows} header rows indicating column metadata " + f"and {header_cols} header columns indicating row metadata ", refname=script_name) return mtx
# .....................................................................................
[docs]def build_parser(): """Build an argparse.ArgumentParser object for the tool. Returns: argparse.ArgumentParser: An argument parser for the tool"s parameters. """ parser = argparse.ArgumentParser( prog="convert_csv_to_lmm", description=DESCRIPTION ) parser.add_argument('--config_file', type=str, help='Path to configuration file.') parser.add_argument( "--log_filename", "-l", type=str, help="A file location to write logging data." ) parser.add_argument( "--log_console", action="store_true", default=False, help="If provided, write log to console." ) parser.add_argument( "-r", "--report_filename", type=str, help="File location to write the wrangler report." ) parser.add_argument( "--data_type", choices=["float", "int"], type=str, default="float", help="The data type of the values in the CSV, options are `float` or `int`.", ) parser.add_argument( "--header_rows", type=int, default=0, help="The number of header rows in the CSV.", ) parser.add_argument( "--header_cols", type=int, default=0, help="The number of header columns in the CSV.", ) parser.add_argument( "in_csv_filename", type=str, help="CSV filename to convert to lmm Matrix." ) parser.add_argument("out_lmm_filename", type=str, help="Filename for .lmm Matrix.") return parser
# ..................................................................................... def test_inputs(args): """Test input data and configuration files for existence. Args: args: arguments pre-processed for this tool. Returns: all_missing_inputs: Error messages for display on exit. """ all_missing_inputs = test_files((args.in_csv_filename, "CSV input")) return all_missing_inputs # .....................................................................................
[docs]def cli(): """Provide a command-line tool for converting csvs to lmms. Raises: OSError: on failure to write to report_filename. IOError: on failure to write to report_filename. """ parser = build_parser() args = _process_arguments(parser, config_arg='config_file') errs = test_inputs(args) if errs: print("Errors, exiting program") exit("\n".join(errs)) logger = Logger( script_name, log_filename=args.log_filename, log_console=args.log_console ) logger.log( f"Beware: {script_name} has not been fully tested", refname=script_name, log_level=WARN) mtx = convert_csv_to_lmm( args.in_csv_filename, args.header_rows, args.header_cols, args.data_type, logger) (row_count, col_count) = mtx.shape mtx.write(args.out_lmm_filename) logger.log( f"Wrote into matrix {args.out_lmm_filename} containing " + f"{row_count} rows and {col_count} columns", refname=script_name) # If the output report was requested, write it if args.report_filename: report = { "in_csv_filename": args.in_csv_filename, "header_rows": args.header_rows, "header_cols": args.header_cols, "out_lmm_filename": args.out_lmm_filename, "rows": row_count, "columns": col_count } try: with open(args.report_filename, mode='wt') as out_file: json.dump(report, out_file, indent=4) except OSError: raise except IOError: raise logger.log( f"Wrote report file to {args.report_filename}", refname=script_name)
# ..................................................................................... __all__ = ["build_parser", "cli", "convert_csv_to_lmm"] # ..................................................................................... if __name__ == "__main__": # pragma: no cover cli()