CSV Knife¶
A small command-line utility for applying focused CSV transformations.
The script reads UTF-8 CSV data with Python’s standard csv module.
Input can come from a file path or from standard input by passing -.
Output is written to standard output unless --output is provided.
Commands
reverseReverse CSV row order. By default, the first row is treated as a header and kept in place. Pass
--no-headerto reverse every row.remove-columnsRemove one or more columns by exact header name.
truncateShorten values longer than a configured character limit and append a suffix showing how many characters were removed.
For example, truncating
"abcdefghijklmnopqrstuvwxyz"to10characters produces:abcdefghij(...16 more)
The command also prints the maximum value length found in the input.
Usage
python csv_knife.py --help
python csv_knife.py reverse input.csv --output reversed.csv
python csv_knife.py remove-columns input.csv email phone --output public.csv
python csv_knife.py truncate input.csv 80 --output shortened.csv
from __future__ import annotations
import argparse
import csv
import sys
from pathlib import Path
from typing import Iterable, TextIO
- open_input(path: str) TextIO¶
Open a CSV input stream.
- Parameters:
path – File path to read, or
"-"to usesys.stdin.- Returns:
A readable text stream.
def open_input(path: str) -> TextIO:
if path == "-":
return sys.stdin
return Path(path).open("r", newline="", encoding="utf-8")
- open_output(path: str | None) TextIO¶
Open a CSV output stream.
- Parameters:
path – File path to write,
"-"forsys.stdout, orNoneto usesys.stdout.- Returns:
A writable text stream.
def open_output(path: str | None) -> TextIO:
if path in (None, "-"):
return sys.stdout
return Path(path).open("w", newline="", encoding="utf-8")
- read_csv(path: str) list[list[str]]¶
Read all rows from a CSV source.
- Parameters:
path – File path to read, or
"-"to read from standard input.- Returns:
CSV rows as a list of string lists.
def read_csv(path: str) -> list[list[str]]:
with open_input(path) as file:
return list(csv.reader(file))
- write_csv(rows: Iterable[Iterable[str]], output: str | None) None¶
Write rows to a CSV destination.
- Parameters:
rows – Row values to write.
output – File path to write,
"-"for standard output, orNonefor standard output.
def write_csv(rows: Iterable[Iterable[str]], output: str | None) -> None:
with open_output(output) as file:
writer = csv.writer(file)
writer.writerows(rows)
- reverse_csv(args: argparse.Namespace) None¶
Run the
reversesubcommand.The first row is preserved as a header unless
args.no_headeris true.- Parameters:
args – Parsed command-line arguments with
input,output, andno_headerattributes.
def reverse_csv(args: argparse.Namespace) -> None:
print(
f"Reversing CSV rows from {args.input} to {args.output or 'stdout'}",
file=sys.stderr,
)
rows = read_csv(args.input)
if args.no_header or not rows:
write_csv(reversed(rows), args.output)
return
header, data_rows = rows[0], rows[1:]
write_csv([header, *reversed(data_rows)], args.output)
- remove_columns(args: argparse.Namespace) None¶
Run the
remove-columnssubcommand.Columns are matched by exact header name. Rows shorter than the header are padded with empty strings for any retained column positions they do not contain.
- Parameters:
args – Parsed command-line arguments with
input,output, andcolumnsattributes.- Raises:
SystemExit – If any requested column is missing from the header.
def remove_columns(args: argparse.Namespace) -> None:
print(
f"Removing columns {args.columns} from CSV "
f"{args.input} to {args.output or 'stdout'}",
file=sys.stderr,
)
rows = read_csv(args.input)
if not rows:
write_csv([], args.output)
return
header = rows[0]
missing_columns = [column for column in args.columns if column not in header]
if missing_columns:
joined = ", ".join(missing_columns)
raise SystemExit(f"Unknown column(s): {joined}")
columns_to_remove = set(args.columns)
kept_indexes = [
index for index, column in enumerate(header) if column not in columns_to_remove
]
filtered_rows = (
[row[index] if index < len(row) else "" for index in kept_indexes]
for row in rows
)
write_csv(filtered_rows, args.output)
- truncate_value(value: str, limit: int) str¶
Return a value shortened to
limitcharacters when needed.Values longer than
limitare returned asvalue[:limit]followed by(...N more), whereNis the number of omitted characters.- Parameters:
value – Original CSV value.
limit – Maximum number of original characters to keep.
- Returns:
The original value or its truncated representation.
def truncate_value(value: str, limit: int) -> str:
remaining_chars = len(value) - limit
if remaining_chars <= 0:
return value
return f"{value[:limit]}(...{remaining_chars} more)"
- truncate_csv(args: argparse.Namespace) None¶
Run the
truncatesubcommand.The command prints the maximum value length found in the input to standard error, then writes the transformed CSV rows.
- Parameters:
args – Parsed command-line arguments with
input,output, andlimitattributes.
def truncate_csv(args: argparse.Namespace) -> None:
print(
f"Truncating CSV values from {args.input} "
f"to {args.output or 'stdout'} with limit {args.limit}",
file=sys.stderr,
)
rows = read_csv(args.input)
max_value_length = max((len(value) for row in rows for value in row), default=0)
print(f"Maximum value length: {max_value_length}", file=sys.stderr)
truncated_rows = (
[truncate_value(value, args.limit) for value in row]
for row in rows
)
write_csv(truncated_rows, args.output)
- build_parser() argparse.ArgumentParser¶
Build the top-level argument parser and subcommand parsers.
- Returns:
Configured
argparse.ArgumentParserinstance.
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Process CSV files with small command-line transformations."
)
subparsers = parser.add_subparsers(dest="command", required=True)
reverse_parser = subparsers.add_parser(
"reverse", help="Reverse the order of CSV data rows."
)
reverse_parser.add_argument("input", help="Input CSV path, or '-' for stdin.")
reverse_parser.add_argument(
"-o", "--output", help="Output CSV path. Defaults to stdout."
)
reverse_parser.add_argument(
"--no-header",
action="store_true",
help="Reverse all rows instead of preserving the first row as a header.",
)
reverse_parser.set_defaults(func=reverse_csv)
remove_parser = subparsers.add_parser(
"remove-columns", help="Remove columns by header name."
)
remove_parser.add_argument("input", help="Input CSV path, or '-' for stdin.")
remove_parser.add_argument(
"columns", nargs="+", help="One or more column names to remove."
)
remove_parser.add_argument(
"-o", "--output", help="Output CSV path. Defaults to stdout."
)
remove_parser.set_defaults(func=remove_columns)
truncate_parser = subparsers.add_parser(
"truncate", help="Truncate values longer than the specified limit."
)
truncate_parser.add_argument("input", help="Input CSV path, or '-' for stdin.")
truncate_parser.add_argument(
"limit", type=int, help="Maximum number of original characters to keep."
)
truncate_parser.add_argument(
"-o", "--output", help="Output CSV path. Defaults to stdout."
)
truncate_parser.set_defaults(func=truncate_csv)
return parser
- main() None¶
Parse command-line arguments and dispatch to the selected subcommand.
def main() -> None:
parser = build_parser()
args = parser.parse_args()
if getattr(args, "limit", 0) < 0:
parser.error("limit must be greater than or equal to 0")
args.func(args)
if __name__ == "__main__":
main()