From bddb3d7fd63f0be6239938eabae39d284074ba6a Mon Sep 17 00:00:00 2001 From: Zafiris Galanopoulos Date: Thu, 30 May 2019 00:17:02 +0100 Subject: [PATCH 1/2] Adding experimental support for Mathesis https://mathesis.cup.gr --- edx_dl/parsing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/edx_dl/parsing.py b/edx_dl/parsing.py index 5e50d354..fbe892db 100644 --- a/edx_dl/parsing.py +++ b/edx_dl/parsing.py @@ -414,7 +414,9 @@ def get_page_extractor(url): """ if ( url.startswith('https://courses.edx.org') or - url.startswith('https://mitxpro.mit.edu') + url.startswith('https://mitxpro.mit.edu') or + url.startswith('https://mathesis.cup.gr') + ): return NewEdXPageExtractor() elif ( From d1cbb7afd5590966b2a2ba6f8fca76a6a7a2ec34 Mon Sep 17 00:00:00 2001 From: Zafiris Galanopoulos Date: Thu, 30 May 2019 00:20:24 +0100 Subject: [PATCH 2/2] When using Mathesis platform, avoid ascii filenames and directory names as the content is in greek. --- edx_dl/edx_dl.py | 18 +++++++++++++++--- edx_dl/utils.py | 4 ++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py index 64486674..1482d5d9 100644 --- a/edx_dl/edx_dl.py +++ b/edx_dl/edx_dl.py @@ -93,6 +93,10 @@ 'bits':{ 'url':'http://any-learn.bits-pilani.ac.in', 'courseware-selector': ('nav', {'aria-label': 'Course Navigation'}), + }, + 'mathesis':{ + 'url': 'https://mathesis.cup.gr', + 'courseware-selector': ('nav', {'aria-label': 'Course Navigation'}), } } BASE_URL = OPENEDX_SITES['edx']['url'] @@ -409,7 +413,6 @@ def parse_args(): return args - def edx_get_headers(): """ Build the Open edX headers to create future requests. @@ -828,11 +831,20 @@ def download(args, selections, all_units, headers): # sections/subsections to add correct prefixes and show nicer information. for selected_course, selected_sections in selections.items(): - coursename = directory_name(selected_course.name) + # If the platform is mathesis don't convert the directory name to ascii + if args.platform == 'mathesis': + coursename = directory_name(selected_course.name, dont_use_ascii=True) + else: + coursename = directory_name(selected_course.name) for selected_section in selected_sections: section_dirname = "%02d-%s" % (selected_section.position, selected_section.name) - target_dir = os.path.join(args.output_dir, coursename, + # If our platform is mathesis, then the filename contains greek chars, so clean the bare minimum + if args.platform == 'mathesis': + target_dir = os.path.join(args.output_dir, coursename, + clean_filename(section_dirname, minimal_change=True)) + else: + target_dir = os.path.join(args.output_dir, coursename, clean_filename(section_dirname)) mkdir_p(target_dir) counter = 0 diff --git a/edx_dl/utils.py b/edx_dl/utils.py index 0ec44718..220d4d2e 100644 --- a/edx_dl/utils.py +++ b/edx_dl/utils.py @@ -42,11 +42,11 @@ def execute_command(cmd, args): raise e -def directory_name(initial_name): +def directory_name(initial_name, dont_use_ascii=False): """ Transform the name of a directory into an ascii version """ - result = clean_filename(initial_name) + result = clean_filename(initial_name, minimal_change=dont_use_ascii) return result if result != "" else "course_folder"