CSV Knife

A small command-line utility for applying focused CSV transformations.

The script reads UTF-8 CSV data with Python’s standard csv module. Input can come from a file path or from standard input by passing -. Output is written to standard output unless --output is provided.

Commands

reverse

Reverse CSV row order. By default, the first row is treated as a header and kept in place. Pass --no-header to reverse every row.

remove-columns

Remove one or more columns by exact header name.

truncate

Shorten values longer than a configured character limit and append a suffix showing how many characters were removed.

For example, truncating "abcdefghijklmnopqrstuvwxyz" to 10 characters produces:

abcdefghij(...16 more)

The command also prints the maximum value length found in the input.

Usage

python csv_knife.py --help
python csv_knife.py reverse input.csv --output reversed.csv
python csv_knife.py remove-columns input.csv email phone --output public.csv
python csv_knife.py truncate input.csv 80 --output shortened.csv

from __future__ import annotations

import argparse
import csv
import sys
from pathlib import Path
from typing import Iterable, TextIO
open_input(path: str) TextIO

Open a CSV input stream.

Parameters:

path – File path to read, or "-" to use sys.stdin.

Returns:

A readable text stream.

def open_input(path: str) -> TextIO:
    if path == "-":
        return sys.stdin
    return Path(path).open("r", newline="", encoding="utf-8")
open_output(path: str | None) TextIO

Open a CSV output stream.

Parameters:

path – File path to write, "-" for sys.stdout, or None to use sys.stdout.

Returns:

A writable text stream.

def open_output(path: str | None) -> TextIO:
    if path in (None, "-"):
        return sys.stdout
    return Path(path).open("w", newline="", encoding="utf-8")
read_csv(path: str) list[list[str]]

Read all rows from a CSV source.

Parameters:

path – File path to read, or "-" to read from standard input.

Returns:

CSV rows as a list of string lists.

def read_csv(path: str) -> list[list[str]]:
    with open_input(path) as file:
        return list(csv.reader(file))
write_csv(rows: Iterable[Iterable[str]], output: str | None) None

Write rows to a CSV destination.

Parameters:
  • rows – Row values to write.

  • output – File path to write, "-" for standard output, or None for standard output.

def write_csv(rows: Iterable[Iterable[str]], output: str | None) -> None:
    with open_output(output) as file:
        writer = csv.writer(file)
        writer.writerows(rows)
reverse_csv(args: argparse.Namespace) None

Run the reverse subcommand.

The first row is preserved as a header unless args.no_header is true.

Parameters:

args – Parsed command-line arguments with input, output, and no_header attributes.

def reverse_csv(args: argparse.Namespace) -> None:
    print(
        f"Reversing CSV rows from {args.input} to {args.output or 'stdout'}",
        file=sys.stderr,
    )
    rows = read_csv(args.input)
    if args.no_header or not rows:
        write_csv(reversed(rows), args.output)
        return

    header, data_rows = rows[0], rows[1:]
    write_csv([header, *reversed(data_rows)], args.output)
remove_columns(args: argparse.Namespace) None

Run the remove-columns subcommand.

Columns are matched by exact header name. Rows shorter than the header are padded with empty strings for any retained column positions they do not contain.

Parameters:

args – Parsed command-line arguments with input, output, and columns attributes.

Raises:

SystemExit – If any requested column is missing from the header.

def remove_columns(args: argparse.Namespace) -> None:
    print(
        f"Removing columns {args.columns} from CSV "
        f"{args.input} to {args.output or 'stdout'}",
        file=sys.stderr,
    )
    rows = read_csv(args.input)
    if not rows:
        write_csv([], args.output)
        return

    header = rows[0]
    missing_columns = [column for column in args.columns if column not in header]
    if missing_columns:
        joined = ", ".join(missing_columns)
        raise SystemExit(f"Unknown column(s): {joined}")

    columns_to_remove = set(args.columns)
    kept_indexes = [
        index for index, column in enumerate(header) if column not in columns_to_remove
    ]
    filtered_rows = (
        [row[index] if index < len(row) else "" for index in kept_indexes]
        for row in rows
    )
    write_csv(filtered_rows, args.output)
truncate_value(value: str, limit: int) str

Return a value shortened to limit characters when needed.

Values longer than limit are returned as value[:limit] followed by (...N more), where N is the number of omitted characters.

Parameters:
  • value – Original CSV value.

  • limit – Maximum number of original characters to keep.

Returns:

The original value or its truncated representation.

def truncate_value(value: str, limit: int) -> str:
    remaining_chars = len(value) - limit
    if remaining_chars <= 0:
        return value
    return f"{value[:limit]}(...{remaining_chars} more)"
truncate_csv(args: argparse.Namespace) None

Run the truncate subcommand.

The command prints the maximum value length found in the input to standard error, then writes the transformed CSV rows.

Parameters:

args – Parsed command-line arguments with input, output, and limit attributes.

def truncate_csv(args: argparse.Namespace) -> None:
    print(
        f"Truncating CSV values from {args.input} "
        f"to {args.output or 'stdout'} with limit {args.limit}",
        file=sys.stderr,
    )
    rows = read_csv(args.input)
    max_value_length = max((len(value) for row in rows for value in row), default=0)
    print(f"Maximum value length: {max_value_length}", file=sys.stderr)

    truncated_rows = (
        [truncate_value(value, args.limit) for value in row]
        for row in rows
    )
    write_csv(truncated_rows, args.output)
build_parser() argparse.ArgumentParser

Build the top-level argument parser and subcommand parsers.

Returns:

Configured argparse.ArgumentParser instance.

def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Process CSV files with small command-line transformations."
    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    reverse_parser = subparsers.add_parser(
        "reverse", help="Reverse the order of CSV data rows."
    )
    reverse_parser.add_argument("input", help="Input CSV path, or '-' for stdin.")
    reverse_parser.add_argument(
        "-o", "--output", help="Output CSV path. Defaults to stdout."
    )
    reverse_parser.add_argument(
        "--no-header",
        action="store_true",
        help="Reverse all rows instead of preserving the first row as a header.",
    )
    reverse_parser.set_defaults(func=reverse_csv)

    remove_parser = subparsers.add_parser(
        "remove-columns", help="Remove columns by header name."
    )
    remove_parser.add_argument("input", help="Input CSV path, or '-' for stdin.")
    remove_parser.add_argument(
        "columns", nargs="+", help="One or more column names to remove."
    )
    remove_parser.add_argument(
        "-o", "--output", help="Output CSV path. Defaults to stdout."
    )
    remove_parser.set_defaults(func=remove_columns)

    truncate_parser = subparsers.add_parser(
        "truncate", help="Truncate values longer than the specified limit."
    )
    truncate_parser.add_argument("input", help="Input CSV path, or '-' for stdin.")
    truncate_parser.add_argument(
        "limit", type=int, help="Maximum number of original characters to keep."
    )
    truncate_parser.add_argument(
        "-o", "--output", help="Output CSV path. Defaults to stdout."
    )
    truncate_parser.set_defaults(func=truncate_csv)

    return parser
main() None

Parse command-line arguments and dispatch to the selected subcommand.

def main() -> None:
    parser = build_parser()
    args = parser.parse_args()
    if getattr(args, "limit", 0) < 0:
        parser.error("limit must be greater than or equal to 0")
    args.func(args)


if __name__ == "__main__":
    main()