Files
bildverarbeitungss18-hausau…/nb_strip_output.py
2018-04-18 13:03:26 +02:00

111 lines
3.2 KiB
Python

#! /usr/bin/env python3
"""
strip output of IPython Notebooks
add this as `.git/hooks/pre-commit`
to run every time you commit a notebook
strip outputs from an IPython Notebook Opens a notebook, strips its output, and
writes the outputless version to the original file. Useful mainly as a git
filter or pre-commit hook for users who don't want to track output in VCS.
This does mostly the same thing as the `Clear All Output` command in the
notebook UI.
LICENSE: Public Domain
Adapted from: https://gist.github.com/minrk/6176788
"""
import subprocess
from subprocess import PIPE, Popen
import io
import argparse
import os
try:
# Jupyter >= 4
from nbformat import read, write, NO_CONVERT
except ImportError:
# IPython 3
try:
from IPython.nbformat import read, write, NO_CONVERT
except ImportError:
# IPython < 3
from IPython.nbformat import current
def read(f, as_version):
return current.read(f, 'json')
def write(nb, f):
return current.write(nb, f, 'json')
def get_rev():
command = "git rev-parse --verify HEAD".split(" ")
if subprocess.call(command, stdin=PIPE, stdout=PIPE, stderr=PIPE) == 0:
return 'HEAD'
else:
return '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
def get_notebooks(against):
command = "git diff-index -z --cached {} --name-only".format(against)
p = Popen(command.split(" "), stdin=PIPE, stdout=PIPE, stderr=PIPE)
output, err = p.communicate()
assert p.returncode == 0, \
"Command failed: {}\nstdout:\n{}\nstderr:\n{}"\
.format(command, output, err)
files = [binary_fname.decode('utf8')
for binary_fname in output.split(b'\x00')]
return set([f for f in files if f.endswith(".ipynb")])
def run_as_git_hook():
against = get_rev()
for notebook_fname in get_notebooks(against):
print("Stripping output from: {}".format(notebook_fname))
strip_output(notebook_fname)
command = "git diff-index --check --cached {} --".format(against)
subprocess.call(command.split(" "))
def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell
def strip_output(filename):
"""strip the outputs from a notebook"""
with io.open(filename, 'r', encoding='utf8') as f:
nb = read(f, as_version=NO_CONVERT)
nb.metadata.pop('signature', None)
for cell in _cells(nb):
if 'outputs' in cell:
cell['outputs'] = []
if 'prompt_number' in cell:
cell['prompt_number'] = None
with io.open(filename, 'w', encoding='utf8') as f:
write(nb, f)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Strips the output from a notebook')
parser.add_argument('file', type=str, default='', nargs='?',
help='remove the output from this notebook')
args = parser.parse_args()
if args.file == '':
run_as_git_hook()
else:
fname = args.file
if not os.path.exists(fname):
print("File {} does not exists.".format(fname))
strip_output(fname)