Split Log

This script splits a given log file into multiple output files based on a list of signatures provided in the text file split_log.txt.

Behavior

  • The input log is processed line by line.

  • Log contents are written to the current output file until a matching signature is found.

  • When a signature is found:

    • the current output file is closed;

    • a new output file is created;

    • the line containing the signature is written as the first line of the new output file.

Output Files

Output files are named using a three-digit sequence number followed by the original log file name:

001-{log_file}
002-{log_file}
003-{log_file}

For example, if the input log file is named application.log, the generated files will be:

001-application.log
002-application.log
003-application.log

Signature File

The list of signatures must be stored in a text file named split_log.txt.

Each signature is used to determine where the log should be split.


import argparse
from pathlib import Path
load_signatures(signature_file: Path) list[str]
def load_signatures(signature_file: Path) -> list[str]:
  signatures = []

  with signature_file.open("r", encoding="utf-8") as f:
      for line in f:
          signature = line.rstrip("\n")
          if signature:
              signatures.append(signature)

  return signatures
line_contains_signature(line: str, signatures: list[str]) bool
def line_contains_signature(line: str, signatures: list[str]) -> bool:
  return any(signature in line for signature in signatures)
output_path(output_dir: Path, index: int, log_file: Path) Path
def output_path(output_dir: Path, index: int, log_file: Path) -> Path:
  return output_dir / f"{index:03d}-{log_file.name}"
split_log(log_file: Path, signature_file: Path, output_dir: Path)
def split_log(log_file: Path, signature_file: Path, output_dir: Path) -> None:
  signatures = load_signatures(signature_file)

  if not signatures:
      raise ValueError(f"No signatures found in {signature_file}")

  output_dir.mkdir(parents=True, exist_ok=True)

  part_index = 1
  current_output = output_path(output_dir, part_index, log_file)
  out = current_output.open("w", encoding="utf-8")
  print(current_output)

  try:
      with log_file.open("r", encoding="utf-8", errors="replace") as log:
          for line in log:
              if line_contains_signature(line, signatures):
                  out.close()

                  part_index += 1
                  current_output = output_path(output_dir, part_index, log_file)
                  print(current_output)
                  out = current_output.open("w", encoding="utf-8")

              out.write(line)
  finally:
      out.close()
main()
def main() -> None:
  parser = argparse.ArgumentParser(
      description="Split a log file into numbered parts using signatures from split_log.txt."
  )

  parser.add_argument(
      "log_file",
      help="Path to the log file to split.",
  )

  parser.add_argument(
      "-s",
      "--signatures",
      default="split_log.txt",
      help="Path to the signatures file. Default: split_log.txt",
  )

  parser.add_argument(
      "-o",
      "--output-dir",
      default=".",
      help="Directory where split files will be written. Default: current directory",
  )

  args = parser.parse_args()

  split_log(
      log_file=Path(args.log_file),
      signature_file=Path(args.signatures),
      output_dir=Path(args.output_dir),
  )


if __name__ == "__main__":
  main()