get_textbox_submissions_as_docx.py explained
Note that the code below is simplified from the actual code. The full course code can be seen at https://github.com/gqmaguirejr/Canvas-tools Links to an external site.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# -*- mode: python; python-indent-offset: 4 -*-
#
# ./get_textbox_submissions_as_docx.py course_id assignment_id
# Purpose:
# create a DOCX file for each textual submission
#
# with the option "-v" or "--verbose" you get lots of output - showing in detail the operations of the program
# with the option '-C' or '--containers' use HTTP rather than HTTPS for access to Canvas
#
# Can also be called with an alternative configuration file:
# --config config-test.json
#
# Example - to generate docx file locally
# ./get_textbox_submissions_as_docx.py -C 7 44^
#
# # Example - to generate docx file locally and upload it as a submission
# ./get_textbox_submissions_as_docx.py --submit -C 7 44
#
# to get the DOCX library do:
# pip3 install python-docx
#
# G. Q. Maguire Jr.
#
# 2020.03.01
#
import requests, time
import pprint
import optparse
import sys
import json
# use the Docment library to be able to create the .docx file
from docx import Document
#from docx.shared import Inches
from docx.shared import Cm
from docx.enum.text import WD_COLOR_INDEX
from docx.enum.text import WD_BREAK
from docx.shared import Pt
import os
from bs4 import BeautifulSoup
This code gets the URL to use when accessing Canvas and the token to authenticate
global baseUrl # the base URL used for access to Canvas
global header # the header for all HTML requests
global payload # place to store additionally payload when needed for options to HTML requests
# Based upon the options to the program, initialize the variables used to access Canvas gia HTML requests
def initialize(options):
global baseUrl, header, payload
# styled based upon https://martin-thoma.com/configuration-files-in-python/
if options.config_filename:
config_file=options.config_filename
else:
config_file='config.json'
try:
with open(config_file) as json_data_file:
configuration = json.load(json_data_file)
access_token=configuration["canvas"]["access_token"]
if options.containers:
baseUrl="http://"+configuration["canvas"]["host"]+"/api/v1"
print("using HTTP for the container environment")
else:
baseUrl="https://"+configuration["canvas"]["host"]+"/api/v1"
header = {'Authorization' : 'Bearer ' + access_token}
payload = {}
except:
print("Unable to open configuration file named {}".format(config_file))
print("Please create a suitable configuration file, the default name is config.json")
sys.exit()
This code makes sure that the assignment allows for an upload and a docx file to be submitted
def enable_docx_submission_for_assignment(course_id, assignment_id, assignment):
# Use the Canvas API to submit to edit an assignment
#PUT /api/v1/courses/:course_id/assignments/:id
url = "{0}/courses/{1}/assignments/{2}".format(baseUrl, course_id, assignment_id)
something_to_edit=False
# add 'online_upload' as a submission type if it is not there
current_submission_types=assignment.get('submission_types', [])
if not current_submission_types:
print("Assignment does not permit submissions!")
return False
if 'online_upload' not in current_submission_types:
current_submission_types.append('online_upload')
parameters['assignment[submission_types][]']=current_submission_types
something_to_edit=True
# if there are specified allowed extensions, make sure they include docx, else add it to the list of extensions
currently_allowed_extensions=assignment.get('allowed_extensions', False)
if currently_allowed_extensions:
if 'docx' not in currently_allowed_extensions:
if len(currently_allowed_extensions) > 0:
currently_allowed_extensions.append('docx')
parameters['assignment[allowed_extensions][]']=currently_allowed_extensions
something_to_edit=True
if something_to_edit:
r = requests.put(url, params=parameters, headers = header)
if r.status_code < int(300):
page_response=r.json()
return page_response
return True
This code does the multistage process to upload a file
def submit_file_for_assignment(course_id, assignment_id, user_id, filename):
# Use the Canvas API to submit a file for an assignment
#POST /api/v1/courses/:course_id/assignments/:assignment_id/submissions/:user_id/files
url = "{0}/courses/{1}/assignments/{2}/submissions/{3}/files".format(baseUrl, course_id, assignment_id, user_id)
extra_parameters={'as_user_id': user_id}
payload={'name': filename,
'size': os.path.getsize(filename),
'content-type': 'binary/octet-stream',
'on_duplicate': 'overwrite'
}
r = requests.post(url, params=extra_parameters, headers = header, data=payload)
if r.status_code < int(300):
page_response=r.json()
upload_url=page_response['upload_url']
upload_params=page_response['upload_params']
r = requests.post(upload_url,params=upload_params, files={"file": open(filename, 'rb')})
if r.status_code < int(300):
page_response=r.json()
file_id=page_response['id']
# POST /api/v1/courses/:course_id/assignments/:assignment_id/submissions
url = "{0}/courses/{1}/assignments/{2}/submissions".format(baseUrl, course_id, assignment_id)
extra_parameters={'as_user_id': user_id,
'comment[text_comment]': "uploaded DOCX file",
'submission[submission_type]': 'online_upload',
'submission[file_ids][]': file_id,
'submission[user_id]': user_id
}
r = requests.post(url, params=extra_parameters, headers = header)
r.raise_for_status()
if r.status_code < int(300):
page_response=r.json()
return page_response
return False
This code does some modification of the HTML and collects simple paragraphs
def get_paragraphs_from_HTML(b):
# clean up the HTML
b.replace('<br />', '</p><p>')
list_of_paragraphs=[]
xml=BeautifulSoup(b, "lxml")
paragraphs=xml.findAll('p')
for p in paragraphs:
list_of_paragraphs.append(p)
return list_of_paragraphs
This code gets the information about a given user_id
def get_user(user_id):
#GET /api/v1/users/:user_id
url = "{0}/users/{1}".format(baseUrl,user_id)
r = requests.get(url, headers = header)
if r.status_code == requests.codes.ok:
page_response=r.json()
return page_response
return []
This code gets the enrollments for a course
def users_in_course(course_id):
user_found_thus_far=[]
# Use the Canvas API to get the list of users enrolled in this course
#GET /api/v1/courses/:course_id/enrollments
url = "{0}/courses/{1}/enrollments".format(baseUrl,course_id)
extra_parameters={'per_page': '100'}
r = requests.get(url, params=extra_parameters, headers = header)
if r.status_code == requests.codes.ok:
page_response=r.json()
for p_response in page_response:
user_found_thus_far.append(p_response)
# the following is needed when the response has been paginated
# i.e., when the response is split into pieces - each returning only some of the list of modules
# see "Handling Pagination" - Discussion created by tyler.clair@usu.edu on Apr 27, 2015, https://community.canvaslms.com/thread/1500
while r.links.get('next', False):
r = requests.get(r.links['next']['url'], headers=header)
page_response = r.json()
if r.status_code == requests.codes.ok:
for p_response in page_response:
user_found_thus_far.append(p_response)
return user_found_thus_far
This code gets the assignments for a course
def list_assignments(course_id):
assignments_found_thus_far=[]
# Use the Canvas API to get the list of assignments for the course
#GET /api/v1/courses/:course_id/assignments
url = "{0}/courses/{1}/assignments".format(baseUrl, course_id)
r = requests.get(url, headers = header)
if r.status_code == requests.codes.ok:
page_response=r.json()
for p_response in page_response:
assignments_found_thus_far.append(p_response)
# the following is needed when the response has been paginated
# i.e., when the response is split into pieces - each returning only some of the list of assignments
# see "Handling Pagination" - Discussion created by tyler.clair@usu.edu on Apr 27, 2015, https://community.canvaslms.com/thread/1500
while r.links['current']['url'] != r.links['last']['url']:
r = requests.get(r.links['next']['url'], headers=header)
if Verbose_Flag:
print("result of getting assignments for a paginated response: {}".format(r.text))
page_response = r.json()
for p_response in page_response:
assignments_found_thus_far.append(p_response)
return assignments_found_thus_far
This code gets the submissions for a given user for an assignment for a course
def submission_for_assignment_by_user(course_id, assignment_id, user_id):
# return the submission information for a single user's assignment for a specific course as a dict
#
# Use the Canvas API to get a user's submission for a course for a specific assignment
# GET /api/v1/courses/:course_id/assignments/:assignment_id/submissions/:user_id
url = "{0}/courses/{1}/assignments/{2}/submissions/{3}".format(baseUrl, course_id, assignment_id, user_id)
#extra_parameters={'student_ids[]': 'all'}
#r = requests.get(url, params=extra_parameters, headers = header)
r = requests.get(url, headers = header)
if r.status_code == requests.codes.ok:
page_response=r.json()
return page_response
else:
return dict()
The main program
def main():
global Verbose_Flag
parser = optparse.OptionParser()
parser.add_option('-v', '--verbose',
dest="verbose",
default=False,
action="store_true",
help="Print lots of output to stdout"
)
parser.add_option('-C', '--containers',
dest="containers",
default=False,
action="store_true",
help="for the container enviroment in the virtual machine"
)
parser.add_option('-t', '--testing',
dest="testing",
default=False,
action="store_true",
help="execute test code"
)
parser.add_option('-s', '--submit',
dest="submit",
default=False,
action="store_true",
help="submit resulting DOCX file as the student"
)
parser.add_option("--config", dest="config_filename",
help="read configuration from FILE", metavar="FILE")
options, remainder = parser.parse_args()
Verbose_Flag=options.verbose
if Verbose_Flag:
print("ARGV : {}".format(sys.argv[1:]))
print("VERBOSE : {}".format(options.verbose))
print("REMAINING : {}".format(remainder))
print("Configuration file : {}".format(options.config_filename))
initialize(options)
if (len(remainder) < 2):
print("Insuffient arguments - must provide course_id assignment_id")
sys.exit()
else:
course_id=remainder[0]
assignment_id=int(remainder[1])
Get the information about the assignment and the students in the course
all_assignments=list_assignments(course_id)
# check that the assignment is in this course
assignment=False
for a in all_assignments:
if a['id'] == assignment_id:
assignment=a
break
if not assignment:
print("assignment_id={} not found in this course".format(assignment_id))
print("The assignments are: {}".format(all_assignments))
return
enrollments=users_in_course(course_id)
# computer a set of user_id for the students in the course
student_ids=set()
for e in enrollments:
if e['type'] == 'StudentEnrollment':
student_ids.add(e['user_id'])
For every student check if they have a submission and if it has text in it
for s in student_ids:
submission_info=submission_for_assignment_by_user(course_id, assignment_id, s)
An example of this submission_info is:
{'id': 4016,
'body': 'This is a some text for the submission. It is going to contain two paragraphs each of which are multiple lines of text long. Testing 1,
2,
and 3.\r\nThis is a second paragraph of text for the submission. It consists of just several sentences. In this case the paragraph goes one and on and on. For no real reason.',
'url': None,
'grade': None,
'score': None,
'submitted_at': '2021-03-01T11:20:21Z',
'assignment_id': 44,
'user_id': 6,
'submission_type': 'online_text_entry',
'workflow_state': 'submitted',
'grade_matches_current_submission': True,
'graded_at': None,
'grader_id': None,
'attempt': 4,
'cached_due_date': None,
'excused': None,
'late_policy_status': None,
'points_deducted': None,
'grading_period_id': None,
'extra_attempts': None,
'late': False,
'missing': False,
'seconds_late': 0,
'entered_grade': None,
'entered_score': None,
'preview_url': 'http://canvas.docker/courses/7/assignments/44/submissions/6?preview=1&version=4',
'anonymous_id': 'FYUmc'}
The code then looks at the 'body' to process it into a document:
# when a submission has been answered with a text ebtry, the text is placed in the body isn HTML format.
if not submission_info['body']:
continue
if submission_info['body']:
student=get_user(s)
print("{0}: submission_info={1}".format(student['sortable_name'], submission_info))
The following does all of the magic to create the contents of the docx file
document = Document('blank.docx') # start with a blank A4 page document
style = document.styles['Normal']
font = style.font
font.name = 'Garamond'
font.size = Pt(12)
paragraph_format = document.styles['Normal'].paragraph_format
paragraph_format.space_before = Pt(24)
about_submission_text="submission by '{0}' (canvas user_id={1}) in course={2} for assignment={3}, submitted_at={4}".format(student['name'], s, course_id, assignment_id, submission_info['submitted_at'])
p = document.add_paragraph(about_submission_text)
submission_body=submission_info['body']
p=document.add_paragraph('')
font = p.add_run('Submission in HTML:').font
font.highlight_color = WD_COLOR_INDEX.YELLOW
p = document.add_paragraph(submission_body)
p=document.add_paragraph('')
font = p.add_run('Submission:').font
font.highlight_color = WD_COLOR_INDEX.YELLOW
paragraphs=get_paragraphs_from_HTML(submission_body)
for bp in paragraphs:
p = document.add_paragraph(bp)
document_name="{0}-{1}-{2}-{3}.docx".format(student['name'], course_id, assignment_id, submission_info['submitted_at'])
document.save(document_name)
View of such a document:
If the submit option is not specified - do not do any more processing for this student. Otherwise, make sure we can upload a docx file.
if not options.submit: # if the option to submit is not given, then just generate the DOCX files (locally)
continue
status=enable_docx_submission_for_assignment(course_id, assignment_id, assignment)
if not status:
print("unable to enable for docx submission")
continue
Note that if the testing flag is set - do not actually submit the file.
print("about to submit a file")
if not options.testing:
submit_file_for_assignment(course_id, assignment_id, s, document_name)
submission_info2=submission_for_assignment_by_user(course_id, assignment_id, s)
Finally, output the possibly new state of the submission.
print("submission_info after submission={}".format(submission_info2))
The state of the submission.
{'id': 4016,
'body': None,
'url': None,
'grade': None,
'score': None,
'submitted_at': '2021-03-01T12:25:27Z',
'assignment_id': 44,
'user_id': 6,
'submission_type': 'online_upload',
'workflow_state': 'submitted',
'grade_matches_current_submission': True,
'graded_at': None,
'grader_id': None,
'attempt': 5,
'cached_due_date': None,
'excused': None,
'late_policy_status': None,
'points_deducted': None,
'grading_period_id': None,
'extra_attempts': None,
'late': False,
'missing': False,
'seconds_late': 0,
'entered_grade': None,
'entered_score': None,
'preview_url': 'http://canvas.docker/courses/7/assignments/44/submissions/6?preview=1&version=5',
'attachments': [{'id': 85,
'uuid': 'viuGfsbGO0zB68eE3wDhhPsLAWzpHcTL39ULwrLB',
'folder_id': 30,
'display_name': 'Ellen FakeStudent-7-44-2021-03-01T11:20:21Z.docx',
'filename': '1614601526_555__Ellen_FakeStudent-7-44-2021-03-01T11:20:21Z.docx',
'workflow_state': 'processed',
'upload_status': 'success',
'content-type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'url': 'http://canvas.docker/files/85/download?download_frd=1&verifier=viuGfsbGO0zB68eE3wDhhPsLAWzpHcTL39ULwrLB',
'size': 10257,
'created_at': '2021-03-01T12:25:26Z',
'updated_at': '2021-03-01T12:25:27Z',
'unlock_at': None,
'locked': False,
'hidden': False,
'lock_at': None,
'hidden_for_user': False,
'thumbnail_url': None,
'modified_at': '2021-03-01T12:25:26Z',
'mime_class': 'doc',
'media_entry_id': None,
'locked_for_user': False,
'preview_url': None}],
'anonymous_id': 'FYUmc'}
Invoke the function main().
if __name__ == "__main__": main()