The code below fixes notebooks in subdirectories of a directory "baseDir" if they have the problem described here:
https://github.com/jupyter/notebook/issues/5292
The essence of the code is taken from one of the comments.
import json
import requests
from webdav3.client import Client
#baseDir = "/some_dir/numeric-notes/"
baseDir = "/some_dir/einsteinpy/"
# function that changes lists to strings
# FO: fix - don't strip source - indents matter
def fix_json(json):
if 'fixed' in json:
return
if 'worksheets' in json:
for worksheet in json['worksheets']:
fix_json_cells(worksheet)
elif 'cells' in json:
fix_json_cells(json)
json['fixed'] = True
def fix_json_cells(j):
if not j['cells']:
return
for cell in j['cells']:
if 'text' in cell and type(cell['text']) == list:
cell['text'] = "".join([l.strip() for l in cell['text']])
elif 'source' in cell and type(cell['source']) == list:
cell['source'] = "".join([l for l in cell['source']])
if 'outputs' in cell:
for k in range(len(cell['outputs'])):
if 'text' in cell['outputs'][k] and type(cell['outputs'][k]['text']) == list:
cell['outputs'][k]['text'] = "\n".join([l.strip() for l in cell['outputs'][k]['text']])
webdav_options = {
'webdav_hostname': "https://sciencedata/files/",
'webdav_login': '',
'webdav_password': '',
'verify': False,
'webdav_override_methods': {
'check': 'GET'
}
}
webdav_client = Client(webdav_options)
# Get list of directories
baseDirFull = "/files"+baseDir
dirs = webdav_client.list(baseDir, get_info=True)
realDirs = list(filter(lambda dir: dir['isdir'] and dir['path'] != baseDirFull, dirs))
dirNames = list(dir['path'].replace(baseDirFull,'') for dir in realDirs)
# Or just work with a single directory
#dirNames = [baseDir]
# Get list of notebooks
fileNames = []
for dirName in dirNames:
dirNameFull = baseDir+dirName
files = webdav_client.list(dirNameFull, get_info=True)
realFiles = list(filter(lambda file: file['path'].endswith('.ipynb'), files))
fileNames = fileNames + list(file['path'].replace('/files','') for file in realFiles)
# Or just work with a single file
#fileNames = [baseDir+"index.ipynb"]
# Get notebooks, fix and upload
headers = {'Content-type': 'application/json'}
for file in fileNames:
url = "https://sciencedata/files/"+file
r = requests.get(url)
fileJson=json.loads(r.text)
fixedJson=fix_json(fileJson)
rr = requests.put(url, data=json.dumps(fixedJson), headers=headers)