Fix broken notebooks

The code below fixes notebooks in subdirectories of a directory "baseDir" if they have the problem described here:

https://github.com/jupyter/notebook/issues/5292

The essence of the code is taken from one of the comments.

In [ ]:
import json
import requests
from webdav3.client import Client
In [1]:
#baseDir = "/some_dir/numeric-notes/"
baseDir = "/some_dir/einsteinpy/"
In [2]:
# function that changes lists to strings
# FO: fix - don't strip source - indents matter
def fix_json(json):
    if 'fixed' in json:
        return
    if 'worksheets' in json:
        for worksheet in json['worksheets']:
            fix_json_cells(worksheet)
    elif 'cells' in json:
        fix_json_cells(json)
    json['fixed'] = True

def fix_json_cells(j):
    if not j['cells']:
        return
    for cell in j['cells']:
        if 'text' in cell and type(cell['text']) == list:
            cell['text'] = "".join([l.strip() for l in cell['text']])
        elif 'source' in cell and type(cell['source']) == list:
            cell['source'] = "".join([l for l in cell['source']])
        if 'outputs' in cell:
            for k in range(len(cell['outputs'])):
                if 'text' in cell['outputs'][k] and type(cell['outputs'][k]['text']) == list:
                    cell['outputs'][k]['text'] = "\n".join([l.strip() for l in cell['outputs'][k]['text']])
In [3]:
webdav_options = {
 'webdav_hostname': "https://sciencedata/files/",
 'webdav_login': '',
 'webdav_password': '',
 'verify': False,
 'webdav_override_methods': {
  'check': 'GET'
  }
}
webdav_client = Client(webdav_options)
In [34]:
# Get list of directories
baseDirFull = "/files"+baseDir
dirs = webdav_client.list(baseDir, get_info=True)
realDirs = list(filter(lambda dir: dir['isdir'] and dir['path'] != baseDirFull, dirs))
dirNames = list(dir['path'].replace(baseDirFull,'') for dir in realDirs)
In [ ]:
# Or just work with a single directory
#dirNames = [baseDir]
In [35]:
# Get list of notebooks
fileNames = []
for dirName in dirNames:
    dirNameFull = baseDir+dirName
    files = webdav_client.list(dirNameFull, get_info=True)
    realFiles = list(filter(lambda file: file['path'].endswith('.ipynb'), files))
    fileNames = fileNames + list(file['path'].replace('/files','') for file in realFiles)
In [4]:
# Or just work with a single file
#fileNames = [baseDir+"index.ipynb"]
In [6]:
# Get notebooks, fix and upload
headers = {'Content-type': 'application/json'}
for file in fileNames:
    url = "https://sciencedata/files/"+file
    r = requests.get(url)
    fileJson=json.loads(r.text)
    fixedJson=fix_json(fileJson)
    rr = requests.put(url, data=json.dumps(fixedJson), headers=headers)
In [ ]: