get_textbox_submissions_as_docx.py explained

Note that the code below is simplified from the actual code. The full course code can be seen at https://github.com/gqmaguirejr/Canvas-tools Links to an external site.


#!/usr/bin/python3
# -*- coding: utf-8 -*-
# -*- mode: python; python-indent-offset: 4 -*-
#
# ./get_textbox_submissions_as_docx.py course_id assignment_id
# Purpose:
#   create a DOCX file for each textual submission
#
# with the option "-v" or "--verbose" you get lots of output - showing in detail the operations of the program
# with the option '-C' or '--containers' use HTTP rather than HTTPS for access to Canvas
#
# Can also be called with an alternative configuration file:
#  --config config-test.json
#
# Example - to generate docx file locally
# ./get_textbox_submissions_as_docx.py -C 7 44^
#
# # Example - to generate docx file locally and upload it as a submission
# ./get_textbox_submissions_as_docx.py --submit  -C 7 44
#
# to get the DOCX library do:
#    pip3 install python-docx
# 
# G. Q. Maguire Jr.
#
# 2020.03.01
#

import requests, time
import pprint
import optparse
import sys
import json

# use the Docment library to be able to create the .docx file
from docx import Document
#from docx.shared import Inches
from docx.shared import Cm
from docx.enum.text import WD_COLOR_INDEX
from docx.enum.text import WD_BREAK
from docx.shared import Pt

import os

from bs4 import BeautifulSoup

This code gets the URL to use when accessing Canvas and the token to authenticate


global baseUrl	# the base URL used for access to Canvas
global header	# the header for all HTML requests
global payload	# place to store additionally payload when needed for options to HTML requests

# Based upon the options to the program, initialize the variables used to access Canvas gia HTML requests
def initialize(options):
    global baseUrl, header, payload

    # styled based upon https://martin-thoma.com/configuration-files-in-python/
    if options.config_filename:
        config_file=options.config_filename
    else:
        config_file='config.json'

    try:
        with open(config_file) as json_data_file:
            configuration = json.load(json_data_file)
            access_token=configuration["canvas"]["access_token"]
            if options.containers:
                baseUrl="http://"+configuration["canvas"]["host"]+"/api/v1"
                print("using HTTP for the container environment")
            else:
                baseUrl="https://"+configuration["canvas"]["host"]+"/api/v1"

            header = {'Authorization' : 'Bearer ' + access_token}
            payload = {}
    except:
        print("Unable to open configuration file named {}".format(config_file))
        print("Please create a suitable configuration file, the default name is config.json")
        sys.exit()

This code makes sure that the assignment allows for an upload and a docx file to be submitted


def enable_docx_submission_for_assignment(course_id, assignment_id, assignment):
    # Use the Canvas API to submit to edit an assignment
    #PUT /api/v1/courses/:course_id/assignments/:id
    url = "{0}/courses/{1}/assignments/{2}".format(baseUrl, course_id, assignment_id)

    something_to_edit=False
    # add 'online_upload' as a submission type if it is not there
    current_submission_types=assignment.get('submission_types', [])
    if not current_submission_types:
        print("Assignment does not permit submissions!")
        return False

    if 'online_upload' not in current_submission_types:
        current_submission_types.append('online_upload')
        parameters['assignment[submission_types][]']=current_submission_types
        something_to_edit=True

    # if there are specified allowed extensions, make sure they include docx, else add it to the list of extensions
    currently_allowed_extensions=assignment.get('allowed_extensions', False)
    if currently_allowed_extensions:
        if 'docx' not in currently_allowed_extensions:
            if len(currently_allowed_extensions) > 0:
                currently_allowed_extensions.append('docx')
                parameters['assignment[allowed_extensions][]']=currently_allowed_extensions
                something_to_edit=True

    if something_to_edit:
        r = requests.put(url, params=parameters, headers = header)
        if r.status_code < int(300):
            page_response=r.json()
            return page_response
    return True

This code does the multistage process to upload a file


def submit_file_for_assignment(course_id, assignment_id, user_id, filename):
    # Use the Canvas API to submit a file for an assignment
    #POST /api/v1/courses/:course_id/assignments/:assignment_id/submissions/:user_id/files
    url = "{0}/courses/{1}/assignments/{2}/submissions/{3}/files".format(baseUrl, course_id, assignment_id, user_id)
    extra_parameters={'as_user_id': user_id}
    payload={'name': filename,
             'size': os.path.getsize(filename),
             'content-type': 'binary/octet-stream',
             'on_duplicate': 'overwrite'
    }
    r = requests.post(url, params=extra_parameters, headers = header, data=payload)
    if r.status_code < int(300):
        page_response=r.json()
        upload_url=page_response['upload_url']
        upload_params=page_response['upload_params']

        r = requests.post(upload_url,params=upload_params, files={"file": open(filename, 'rb')})
        if r.status_code < int(300):
            page_response=r.json()
            file_id=page_response['id']

            # POST /api/v1/courses/:course_id/assignments/:assignment_id/submissions
            url = "{0}/courses/{1}/assignments/{2}/submissions".format(baseUrl, course_id, assignment_id)

            extra_parameters={'as_user_id': user_id,
                              'comment[text_comment]': "uploaded DOCX file",
                              'submission[submission_type]': 'online_upload',
                              'submission[file_ids][]': file_id,
                              'submission[user_id]': user_id
                              }

            r = requests.post(url, params=extra_parameters, headers = header)
            r.raise_for_status()
            if r.status_code < int(300):
                page_response=r.json()
                return page_response
    return False

This code does some modification of the HTML and collects simple paragraphs


def get_paragraphs_from_HTML(b):
    # clean up the HTML
    b.replace('<br />', '</p><p>')
  
     list_of_paragraphs=[]
    xml=BeautifulSoup(b, "lxml")
    paragraphs=xml.findAll('p')
    for p in paragraphs:
        list_of_paragraphs.append(p)

    return list_of_paragraphs

This code gets the information about a given user_id


def get_user(user_id):
    #GET /api/v1/users/:user_id
    url = "{0}/users/{1}".format(baseUrl,user_id)
    r = requests.get(url, headers = header)
    if r.status_code == requests.codes.ok:
        page_response=r.json()
        return page_response
    return []

This code gets the enrollments for a course


def users_in_course(course_id):
    user_found_thus_far=[]
    # Use the Canvas API to get the list of users enrolled in this course
    #GET /api/v1/courses/:course_id/enrollments
    url = "{0}/courses/{1}/enrollments".format(baseUrl,course_id)
    extra_parameters={'per_page': '100'}
    r = requests.get(url, params=extra_parameters, headers = header)
    if r.status_code == requests.codes.ok:
        page_response=r.json()
        for p_response in page_response:  
            user_found_thus_far.append(p_response)

        # the following is needed when the response has been paginated
        # i.e., when the response is split into pieces - each returning only some of the list of modules
        # see "Handling Pagination" - Discussion created by tyler.clair@usu.edu on Apr 27, 2015, https://community.canvaslms.com/thread/1500
        while r.links.get('next', False):
            r = requests.get(r.links['next']['url'], headers=header)  
            page_response = r.json()  
            if r.status_code == requests.codes.ok:
                for p_response in page_response:  
                    user_found_thus_far.append(p_response)

    return user_found_thus_far

This code gets the assignments for a course


def list_assignments(course_id):
    assignments_found_thus_far=[]
    # Use the Canvas API to get the list of assignments for the course
    #GET /api/v1/courses/:course_id/assignments
    url = "{0}/courses/{1}/assignments".format(baseUrl, course_id)
    r = requests.get(url, headers = header)
    if r.status_code == requests.codes.ok:
        page_response=r.json()
        for p_response in page_response:  
            assignments_found_thus_far.append(p_response)
        # the following is needed when the response has been paginated
        # i.e., when the response is split into pieces - each returning only some of the list of assignments
        # see "Handling Pagination" - Discussion created by tyler.clair@usu.edu on Apr 27, 2015, https://community.canvaslms.com/thread/1500
        while r.links['current']['url'] != r.links['last']['url']:  
            r = requests.get(r.links['next']['url'], headers=header)  
            if Verbose_Flag:
                print("result of getting assignments for a paginated response: {}".format(r.text))
            page_response = r.json()  
            for p_response in page_response:  
                assignments_found_thus_far.append(p_response)

    return assignments_found_thus_far

This code gets the submissions for a given user for an assignment for a course


def submission_for_assignment_by_user(course_id, assignment_id, user_id):
    # return the submission information for a single user's assignment for a specific course as a dict
    #
    # Use the Canvas API to get a user's submission for a course for a specific assignment
    # GET /api/v1/courses/:course_id/assignments/:assignment_id/submissions/:user_id
    url = "{0}/courses/{1}/assignments/{2}/submissions/{3}".format(baseUrl, course_id, assignment_id, user_id)
    #extra_parameters={'student_ids[]': 'all'}
    #r = requests.get(url, params=extra_parameters, headers = header)
    r = requests.get(url, headers = header)
    if r.status_code == requests.codes.ok:
        page_response=r.json()
        return page_response
    else:
        return dict()

The main program


def main():
    global Verbose_Flag

    parser = optparse.OptionParser()

    parser.add_option('-v', '--verbose',
                      dest="verbose",
                      default=False,
                      action="store_true",
                      help="Print lots of output to stdout"
    )

    parser.add_option('-C', '--containers',
                      dest="containers",
                      default=False,
                      action="store_true",
                      help="for the container enviroment in the virtual machine"
    )

    parser.add_option('-t', '--testing',
                      dest="testing",
                      default=False,
                      action="store_true",
                      help="execute test code"
    )

    parser.add_option('-s', '--submit',
                      dest="submit",
                      default=False,
                      action="store_true",
                      help="submit resulting DOCX file as the student"
    )

    parser.add_option("--config", dest="config_filename",
                      help="read configuration from FILE", metavar="FILE")

    options, remainder = parser.parse_args()

    Verbose_Flag=options.verbose
    if Verbose_Flag:
        print("ARGV      : {}".format(sys.argv[1:]))
        print("VERBOSE   : {}".format(options.verbose))
        print("REMAINING : {}".format(remainder))
        print("Configuration file : {}".format(options.config_filename))

    initialize(options)
    if (len(remainder) < 2):
        print("Insuffient arguments - must provide course_id assignment_id")
        sys.exit()
    else:
        course_id=remainder[0]
        assignment_id=int(remainder[1])

Get the information about the assignment and the students in the course


    all_assignments=list_assignments(course_id)
    # check that the assignment is in this course
    assignment=False
    for a in all_assignments:
        if a['id'] == assignment_id:
            assignment=a
            break

    if not assignment:
        print("assignment_id={} not found in this course".format(assignment_id))
        print("The assignments are: {}".format(all_assignments))
        return

    enrollments=users_in_course(course_id)
    # computer a set of user_id for the students in the course
    student_ids=set()
    for e in enrollments:
        if e['type'] == 'StudentEnrollment':
            student_ids.add(e['user_id'])

For every student check if they have a submission and if it has text in it


    for s in student_ids:
        submission_info=submission_for_assignment_by_user(course_id, assignment_id, s)

An example of this submission_info is:

{'id': 4016,
 'body': 'This is a some text for the submission. It is going to contain two paragraphs each of which are multiple lines of text long. Testing 1,
 2,
 and 3.\r\nThis is a second paragraph of text for the submission. It consists of just several sentences. In this case the paragraph goes one and on and on. For no real reason.',
 'url': None,
 'grade': None,
 'score': None,
 'submitted_at': '2021-03-01T11:20:21Z',
 'assignment_id': 44,
 'user_id': 6,
 'submission_type': 'online_text_entry',
 'workflow_state': 'submitted',
 'grade_matches_current_submission': True,
 'graded_at': None,
 'grader_id': None,
 'attempt': 4,
 'cached_due_date': None,
 'excused': None,
 'late_policy_status': None,
 'points_deducted': None,
 'grading_period_id': None,
 'extra_attempts': None,
 'late': False,
 'missing': False,
 'seconds_late': 0,
 'entered_grade': None,
 'entered_score': None,
 'preview_url': 'http://canvas.docker/courses/7/assignments/44/submissions/6?preview=1&version=4',
 'anonymous_id': 'FYUmc'}

The code then looks at the 'body' to process it into a document:


        # when a submission has been answered with a text ebtry, the text is placed in the body isn HTML format.
        if not submission_info['body']:
            continue
        if submission_info['body']:
            student=get_user(s)
            print("{0}: submission_info={1}".format(student['sortable_name'], submission_info))

The following does all of the magic to create the contents of the docx file


            document = Document('blank.docx') # start with a blank A4 page document
            style = document.styles['Normal']
            font = style.font
            font.name = 'Garamond'
            font.size = Pt(12)

            paragraph_format = document.styles['Normal'].paragraph_format
            paragraph_format.space_before = Pt(24)

            about_submission_text="submission by '{0}' (canvas user_id={1}) in course={2} for assignment={3}, submitted_at={4}".format(student['name'], s, course_id, assignment_id, submission_info['submitted_at'])
            p = document.add_paragraph(about_submission_text)
            submission_body=submission_info['body']
            
            p=document.add_paragraph('')
            font = p.add_run('Submission in HTML:').font
            font.highlight_color = WD_COLOR_INDEX.YELLOW
            p = document.add_paragraph(submission_body)

            p=document.add_paragraph('')
            font = p.add_run('Submission:').font
            font.highlight_color = WD_COLOR_INDEX.YELLOW

            paragraphs=get_paragraphs_from_HTML(submission_body)
            for bp in paragraphs:
                p = document.add_paragraph(bp)

            document_name="{0}-{1}-{2}-{3}.docx".format(student['name'], course_id, assignment_id, submission_info['submitted_at'])
            document.save(document_name)

View of such a document:

View of the docx document.  

If the submit option is not specified - do not do any more processing for this student. Otherwise, make sure we can upload a docx file.

       
            if not options.submit: # if the option to submit is not given, then just generate the DOCX files (locally)
                continue

            status=enable_docx_submission_for_assignment(course_id, assignment_id, assignment)
            if not status:
                print("unable to enable for docx submission")
                continue

Note that if the testing flag is set - do not actually submit the file.


            print("about to submit a file")
            if not options.testing:
                submit_file_for_assignment(course_id, assignment_id, s, document_name)
            submission_info2=submission_for_assignment_by_user(course_id, assignment_id, s)

Finally, output the possibly new state of the submission.


            print("submission_info after submission={}".format(submission_info2))

The state of the submission.

            
{'id': 4016,
 'body': None,
 'url': None,
 'grade': None,
 'score': None,
 'submitted_at': '2021-03-01T12:25:27Z',
 'assignment_id': 44,
 'user_id': 6,
 'submission_type': 'online_upload',
 'workflow_state': 'submitted',
 'grade_matches_current_submission': True,
 'graded_at': None,
 'grader_id': None,
 'attempt': 5,
 'cached_due_date': None,
 'excused': None,
 'late_policy_status': None,
 'points_deducted': None,
 'grading_period_id': None,
 'extra_attempts': None,
 'late': False,
 'missing': False,
 'seconds_late': 0,
 'entered_grade': None,
 'entered_score': None,
 'preview_url': 'http://canvas.docker/courses/7/assignments/44/submissions/6?preview=1&version=5',
 'attachments': [{'id': 85,
 'uuid': 'viuGfsbGO0zB68eE3wDhhPsLAWzpHcTL39ULwrLB',
 'folder_id': 30,
 'display_name': 'Ellen FakeStudent-7-44-2021-03-01T11:20:21Z.docx',
 'filename': '1614601526_555__Ellen_FakeStudent-7-44-2021-03-01T11:20:21Z.docx',
 'workflow_state': 'processed',
 'upload_status': 'success',
 'content-type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
 'url': 'http://canvas.docker/files/85/download?download_frd=1&verifier=viuGfsbGO0zB68eE3wDhhPsLAWzpHcTL39ULwrLB',
 'size': 10257,
 'created_at': '2021-03-01T12:25:26Z',
 'updated_at': '2021-03-01T12:25:27Z',
 'unlock_at': None,
 'locked': False,
 'hidden': False,
 'lock_at': None,
 'hidden_for_user': False,
 'thumbnail_url': None,
 'modified_at': '2021-03-01T12:25:26Z',
 'mime_class': 'doc',
 'media_entry_id': None,
 'locked_for_user': False,
 'preview_url': None}],
 'anonymous_id': 'FYUmc'}

Invoke the function main().

          

if __name__ == "__main__": main()