Skip to content

Commit 81d3567

Browse files
committed
chore: remove the unnecessary XML parsing code from HTML XBlock
- HTML XBlock extends from the `LegacyXmlMixin` - It will use the parsing functionality from this mixin.
1 parent 905542c commit 81d3567

File tree

1 file changed

+4
-264
lines changed

1 file changed

+4
-264
lines changed

xblocks_contrib/html/html.py

Lines changed: 4 additions & 264 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""
55

66
import copy
7-
import json
87
import logging
98
import os
109
import re
@@ -16,35 +15,23 @@
1615
from django.utils.translation import gettext_noop as _
1716
from fs.errors import ResourceNotFound
1817
from lxml import etree
19-
from lxml.etree import ElementTree, XMLParser
18+
from lxml.etree import XMLParser
2019
from opaque_keys.edx.keys import CourseKey, UsageKey
2120
from opaque_keys.edx.locator import LibraryLocatorV2
2221
from path import Path as path
2322
from web_fragments.fragment import Fragment
2423
from xblock.core import XML_NAMESPACES, XBlock
25-
from xblock.fields import Boolean, Dict, Scope, ScopeIds, String, UserScope
24+
from xblock.fields import Boolean, Scope, ScopeIds, String, UserScope
2625
from xblock.utils.resources import ResourceLoader
2726

28-
from xblocks_contrib.common.xml_utils import (
29-
apply_pointer_attributes,
30-
deserialize_field,
31-
format_filepath,
32-
is_pointer_tag,
33-
load_definition_xml,
34-
name_to_pathname,
35-
own_metadata,
36-
serialize_field,
37-
)
27+
from xblocks_contrib.common.xml_utils import LegacyXmlMixin, name_to_pathname
3828

3929
log = logging.getLogger(__name__)
4030
resource_loader = ResourceLoader(__name__)
4131

4232
# The global (course-agnostic) anonymous user ID for the user.
4333
ATTR_KEY_DEPRECATED_ANONYMOUS_USER_ID = "edx-platform.deprecated_anonymous_user_id"
4434

45-
# assume all XML files are persisted as utf-8.
46-
EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True, encoding="utf-8")
47-
4835

4936
class MLStripper(HTMLParser):
5037
"helper function for html_to_text below"
@@ -157,7 +144,7 @@ def stringify_children(node):
157144
# This makes our block more resilient. It won't crash in test environments
158145
# where the user service might not be available.
159146
@XBlock.wants("user")
160-
class HtmlBlock(XBlock):
147+
class HtmlBlock(LegacyXmlMixin, XBlock):
161148
"""
162149
The HTML XBlock.
163150
"""
@@ -191,47 +178,9 @@ class HtmlBlock(XBlock):
191178
ENABLE_HTML_XBLOCK_STUDENT_VIEW_DATA = "ENABLE_HTML_XBLOCK_STUDENT_VIEW_DATA"
192179

193180
uses_xmodule_styles_setup = True
194-
filename_extension = "xml"
195181
template_dir_name = "html"
196182
show_in_read_only_mode = True
197183

198-
xml_attributes = Dict(
199-
help="Map of unhandled xml attributes, used only for storage between import and export",
200-
default={},
201-
scope=Scope.settings,
202-
)
203-
metadata_to_strip = (
204-
"data_dir",
205-
"tabs",
206-
"grading_policy",
207-
"discussion_blackouts",
208-
# VS[compat]
209-
# These attributes should have been removed from here once all 2012-fall courses imported into
210-
# the CMS and "inline" OLX format deprecated. But, it never got deprecated. Moreover, it's
211-
# widely used to this date. So, we still have to strip them. Also, removing of "filename"
212-
# changes OLX returned by `/api/olx-export/v1/xblock/{block_id}/`, which indicates that some
213-
# places in the platform rely on it.
214-
"course",
215-
"org",
216-
"url_name",
217-
"filename",
218-
# Used for storing xml attributes between import and export, for roundtrips
219-
"xml_attributes",
220-
# Used by _import_xml_node_to_parent in cms/djangoapps/contentstore/helpers.py to prevent
221-
# XmlMixin from treating some XML nodes as "pointer nodes".
222-
"x-is-pointer-node",
223-
)
224-
225-
# This is a categories to fields map that contains the block category specific fields which should not be
226-
# cleaned and/or override while adding xml to node.
227-
metadata_to_not_to_clean = {
228-
# A category `video` having `sub` and `transcripts` fields
229-
# which should not be cleaned/override in an xml object.
230-
"video": ("sub", "transcripts")
231-
}
232-
233-
metadata_to_export_to_policy = ("discussion_topics",)
234-
235184
@property
236185
def category(self):
237186
return self.scope_ids.block_type
@@ -485,20 +434,6 @@ def workbench_scenarios():
485434
),
486435
]
487436

488-
@classmethod
489-
def clean_metadata_from_xml(cls, xml_object, excluded_fields=()):
490-
"""
491-
Remove any attribute named for a field with scope Scope.settings from the supplied
492-
xml_object
493-
"""
494-
for field_name, field in cls.fields.items(): # pylint: disable=no-member
495-
if (
496-
field.scope == Scope.settings
497-
and field_name not in excluded_fields
498-
and xml_object.get(field_name) is not None
499-
):
500-
del xml_object.attrib[field_name]
501-
502437
# NOTE: html descriptors are special. We do not want to parse and
503438
# export them ourselves, because that can break things (e.g. lxml
504439
# adds body tags when it exports, but they should just be html
@@ -573,140 +508,6 @@ def load_definition(cls, xml_object, system, location, id_generator): # pylint:
573508
# add more info and re-raise
574509
raise Exception(msg).with_traceback(sys.exc_info()[2])
575510

576-
@classmethod
577-
def load_metadata(cls, xml_object):
578-
"""
579-
Read the metadata attributes from this xml_object.
580-
581-
Returns a dictionary {key: value}.
582-
"""
583-
metadata = {"xml_attributes": {}}
584-
for attr, val in xml_object.attrib.items():
585-
586-
if attr in cls.metadata_to_strip:
587-
# don't load these
588-
continue
589-
590-
if attr not in cls.fields: # pylint: disable=unsupported-membership-test
591-
metadata["xml_attributes"][attr] = val
592-
else:
593-
metadata[attr] = deserialize_field(cls.fields[attr], val) # pylint: disable=unsubscriptable-object
594-
return metadata
595-
596-
@classmethod
597-
def apply_policy(cls, metadata, policy):
598-
"""
599-
Add the keys in policy to metadata, after processing them
600-
through the attrmap. Updates the metadata dict in place.
601-
"""
602-
for attr, value in policy.items():
603-
if attr not in cls.fields: # pylint: disable=unsupported-membership-test
604-
# Store unknown attributes coming from policy.json
605-
# in such a way that they will export to xml unchanged
606-
metadata["xml_attributes"][attr] = value
607-
else:
608-
metadata[attr] = value
609-
610-
@classmethod
611-
def parse_xml(cls, node, runtime, keys):
612-
"""
613-
Use `node` to construct a new block.
614-
615-
Arguments:
616-
node (etree.Element): The xml node to parse into an xblock.
617-
618-
runtime (:class:`.Runtime`): The runtime to use while parsing.
619-
620-
keys (:class:`.ScopeIds`): The keys identifying where this block
621-
will store its data.
622-
623-
Returns (XBlock): The newly parsed XBlock
624-
625-
"""
626-
627-
if keys is None:
628-
# Passing keys=None is against the XBlock API but some platform tests do it.
629-
def_id = runtime.id_generator.create_definition(node.tag, node.get("url_name"))
630-
keys = ScopeIds(None, node.tag, def_id, runtime.id_generator.create_usage(def_id))
631-
aside_children = []
632-
633-
# Let the runtime construct the block. It will have a proper, inheritance-aware field data store.
634-
block = runtime.construct_xblock_from_class(cls, keys)
635-
636-
# VS[compat]
637-
# In 2012, when the platform didn't have CMS, and all courses were handwritten XML files, problem tags
638-
# contained XML problem descriptions withing themselves. Later, when Studio has been created, and "pointer" tags
639-
# became the preferred problem format, edX has to add this compatibility code to 1) support both pre- and
640-
# post-Studio course formats simulteneously, and 2) be able to migrate 2012-fall courses to Studio. Old style
641-
# support supposed to be removed, but the deprecation process have never been initiated, so this
642-
# compatibility must stay, probably forever.
643-
if is_pointer_tag(node):
644-
# new style:
645-
# read the actual definition file--named using url_name.replace(':','/')
646-
definition_xml, filepath = load_definition_xml(node, runtime, keys.def_id)
647-
aside_children = runtime.parse_asides(definition_xml, keys.def_id, keys.usage_id, runtime.id_generator)
648-
else:
649-
filepath = None
650-
definition_xml = node
651-
652-
# Note: removes metadata.
653-
definition, children = cls.load_definition(definition_xml, runtime, keys.def_id, runtime.id_generator)
654-
655-
# VS[compat]
656-
# Make Ike's github preview links work in both old and new file layouts.
657-
if is_pointer_tag(node):
658-
# new style -- contents actually at filepath
659-
definition["filename"] = [filepath, filepath]
660-
661-
metadata = cls.load_metadata(definition_xml)
662-
663-
# move definition metadata into dict
664-
dmdata = definition.get("definition_metadata", "")
665-
if dmdata:
666-
metadata["definition_metadata_raw"] = dmdata
667-
try:
668-
metadata.update(json.loads(dmdata))
669-
except Exception as err: # lint-amnesty, pylint: disable=broad-except
670-
log.debug("Error in loading metadata %r", dmdata, exc_info=True)
671-
metadata["definition_metadata_err"] = str(err)
672-
673-
definition_aside_children = definition.pop("aside_children", None)
674-
if definition_aside_children:
675-
aside_children.extend(definition_aside_children)
676-
677-
# Set/override any metadata specified by policy
678-
cls.apply_policy(metadata, runtime.get_policy(keys.usage_id))
679-
680-
field_data = {**metadata, **definition}
681-
682-
for field_name, value in field_data.items():
683-
# The 'xml_attributes' field has a special setter logic in its Field class,
684-
# so we must handle it carefully to avoid duplicating data.
685-
if field_name == "xml_attributes":
686-
# The 'filename' attribute is specially handled for git links.
687-
value["filename"] = definition.get("filename", ["", None])
688-
block.xml_attributes.update(value)
689-
elif field_name in block.fields:
690-
setattr(block, field_name, value)
691-
692-
block.children = children
693-
694-
if aside_children:
695-
cls.add_applicable_asides_to_block(block, runtime, aside_children)
696-
697-
return block
698-
699-
@classmethod
700-
def add_applicable_asides_to_block(cls, block, runtime, aside_children):
701-
"""
702-
Add asides to the block. Moved this out of the parse_xml method to use it in the VideoBlock.parse_xml
703-
"""
704-
asides_tags = [aside_child.tag for aside_child in aside_children]
705-
asides = runtime.get_asides(block)
706-
for aside in asides:
707-
if aside.scope_ids.block_type in asides_tags:
708-
block.add_aside(aside)
709-
710511
@classmethod
711512
def parse_xml_new_runtime(cls, node, runtime, keys):
712513
"""
@@ -721,67 +522,6 @@ def parse_xml_new_runtime(cls, node, runtime, keys):
721522
cls._set_field_if_present(block, name, value, {})
722523
return block
723524

724-
def export_to_file(self):
725-
"""If this returns True, write the definition of this block to a separate
726-
file.
727-
728-
NOTE: Do not override this without a good reason. It is here
729-
specifically for customtag...
730-
"""
731-
return True
732-
733-
def add_xml_to_node(self, node):
734-
"""For exporting, set data on `node` from ourselves."""
735-
xml_object = self.definition_to_xml(self.runtime.export_fs)
736-
if xml_object is None:
737-
return
738-
739-
for aside in self.runtime.get_asides(self):
740-
if aside.needs_serialization():
741-
aside_node = etree.Element("unknown_root", nsmap=XML_NAMESPACES)
742-
aside.add_xml_to_node(aside_node)
743-
xml_object.append(aside_node)
744-
745-
not_to_clean_fields = self.metadata_to_not_to_clean.get(self.category, ())
746-
self.clean_metadata_from_xml(xml_object, excluded_fields=not_to_clean_fields)
747-
xml_object.tag = self.category
748-
node.tag = self.category
749-
750-
for attr in sorted(own_metadata(self)):
751-
if (
752-
attr not in self.metadata_to_strip
753-
and attr not in self.metadata_to_export_to_policy
754-
and attr not in not_to_clean_fields
755-
):
756-
# pylint: disable=unsubscriptable-object
757-
val = serialize_field(self.fields[attr].to_json(getattr(self, attr)))
758-
try:
759-
xml_object.set(attr, val)
760-
except Exception: # pylint: disable=broad-exception-caught
761-
logging.exception("Failed to serialize metadata attribute %s in module %s.", attr, self.url_name)
762-
763-
for key, value in self.xml_attributes.items():
764-
if key not in self.metadata_to_strip:
765-
xml_object.set(key, serialize_field(value))
766-
767-
if self.export_to_file():
768-
url_path = name_to_pathname(self.url_name)
769-
filepath = format_filepath(
770-
self.category, self.location.run if self.category == "course" else url_path
771-
)
772-
self.runtime.export_fs.makedirs(os.path.dirname(filepath), recreate=True)
773-
with self.runtime.export_fs.open(filepath, "wb") as fileobj:
774-
ElementTree(xml_object).write(fileobj, pretty_print=True, encoding="utf-8")
775-
else:
776-
node.clear()
777-
node.tag = xml_object.tag
778-
node.text = xml_object.text
779-
node.tail = xml_object.tail
780-
node.attrib.update(xml_object.attrib)
781-
node.extend(xml_object)
782-
783-
apply_pointer_attributes(node, self)
784-
785525
def definition_to_xml(self, resource_fs):
786526
"""
787527
Returns an lxml Element representing the definition of this block.

0 commit comments

Comments
 (0)