notes-20160811-code

p { color: red }
 

#!/usr/bin/python
# from http://www.reades.com/2012/11/29/mb-archives/
# From Jon Reades, blog entry 29 Nov. 2012 "Extracting files from Moodle MBZ Archives with Python"
#
# rename the .mbz file to end in .zip, then unzip
#
# Configure the Python script below with the appropriate parameters:
#   destination: path to where you want the Moodle files saved
#   source: path to where the unpacked MBZ file is stored
#   pattern: types of files you want to extract (the ones listed here are fairly comprehensive,
#            and if you need more then you probably know enough to adjust the regex)
#
# Run the script and check that you've picked up all the content you needed.
#
# Note that there are a few limitations to this script:
#
#    It doesn't preserve any hierarchy from the Moodle archive (so if there are folders and subfolders in the backup you will lose this)
#    It doesn't deal with files that have the same name - in this case it will blindly overwrite the first occurrence of a file with the same name using the second file of the same name
#
# 2016.08.11 Note from GQMJr:
# One of the ways to quickly correct the problem of nonuniqueness of filenames is to include the "itemid" field or file is in the final filename
#
# The entries has the form:
#
# 
# d332f83ec4eabce53214fe036329b4f9c91b0b9a
#    6652
#    mod_resource
#    content
#    0
#    /
#    foo-filename.pdf
#    3
#    111973
#    application/pdf
#    0
#    1426863384
#    1426863389
#    talk-guidelines.pdf
#    Firstname Lastname
#    allrightsreserved
#    1
#    $@NULL@$
#    $@NULL@$
#    $@NULL@$
#   

import xml.etree.ElementTree as etree
import fnmatch
import shutil
import os
import re


def locate(pattern, root=os.curdir):
    '''Locate all files matching supplied filename pattern in and below
    supplied root directory.'''
    for path, dirs, files in os.walk(os.path.abspath(root)):
        for filename in fnmatch.filter(files, pattern):
            yield os.path.join(path, filename)

#destination = '/Users/foobar/Desktop/Moodle Copy/'
destination = './Moodle_copy/'
#source      = '/Users/foobar/Desktop/Moodle Backup/'
source      = './Moodle_backup/'
# GQMjr added gz|tar|jpg|log|tgz
pattern     = re.compile('^\s*(.+\.(?:pdf|png|zip|gz|tar|jpg|log|tgz|rtf|sav|mp3|mht|por|xlsx?|docx?|pptx?))\s*$', flags=re.IGNORECASE)

tree = etree.parse(source + 'files.xml')
root = tree.getroot()

counter = 1

print "Root: ", root

for rsrc in root:
        #print "Child id: ", rsrc.attrib
        contextid = rsrc.find('contextid').text    #GQMjr
        fhash = rsrc.find('contenthash').text
        fname = rsrc.find('filename').text

        #print "\tcontextid:'", contextid, "'"     #GQMjr
        #print "\tHash: '", fhash, "'"
        #print "\tName: '", fname, "'"
        
        hit = pattern.search(fname)

        if hit:
                #print "\tMatch: ", hit.group(1)
                files = locate(fhash, source)
                #print "\tFiles: ", files
                for x in files:
                        #print "Copying: ", x                      #GQMjr
                        #shutil.copyfile(x, destination + fname)   #GQMjr
                        if not os.path.exists(destination + contextid):  #GQMjr
                            os.makedirs(destination + contextid)         #GQMjr
                        if os.path.isfile(destination + contextid +'/' + fname):  #GQMjr
                            shutil.copyfile(x, destination + contextid +'/' + fname + '-' + str(counter)) #GQMjr
                            counter=counter+1 #GQMjr
                        else:
                            shutil.copyfile(x, destination + contextid +'/' + fname) #GQMjr
        else: 
            if fname == '.':   # a . directory - nothing to do #GQMjr
                continue                                       #GQMjr
            print "No Match: '", fname, "'"