New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixes agent merging failing unexpectedly #4699
base: production
Are you sure you want to change the base?
Conversation
@@ -308,7 +308,7 @@ def update_record(record: models.Model): | |||
|
|||
response: http.HttpResponse = update_record(obj) | |||
if response is not None and response.status_code != 204: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@CarolineDenis thanks for fixing this!
@acwhite211 on second thought, do we even need to handle response from update_record at all? I added handling the invalid http responses through FailedMergingException to preserve atomicity, which gets caught by the caller of this function. See here.
So, we can get rid of all the HTTP responses [here]. (
specify7/specifyweb/specify/record_merging.py
Lines 150 to 350 in 0751046
def record_merge_fx(model_name: str, old_model_ids: List[int], new_model_id: int, | |
progress: Optional[Progress]=None, | |
new_record_info: Dict[str, Any]=None) -> http.HttpResponse: | |
"""Replaces all the foreign keys referencing the old record ID | |
with the new record ID, and deletes the old record. | |
""" | |
# Confirm the target model table exists | |
model_name = model_name.lower().title() | |
target_model = getattr(spmodels, model_name) | |
if target_model is None: | |
raise FailedMergingException(http.HttpResponseNotFound("model_name: " + model_name + "does not exist.")) | |
# Check to make sure both the old and new agent IDs exist in the table | |
if not target_model.objects.filter(id=new_model_id).select_for_update().exists(): | |
raise FailedMergingException(http.HttpResponseNotFound(model_name + "ID: " + str(new_model_id) + " does not exist.")) | |
for old_model_id in old_model_ids: | |
if not target_model.objects.filter(id=old_model_id).select_for_update().exists(): | |
raise FailedMergingException(http.HttpResponseNotFound(model_name + "ID: " + str(old_model_id) + " does not exist.")) | |
# Get dependent fields and objects of the target object | |
target_object = target_model.objects.get(id=new_model_id) | |
dependant_relationships = [(rel.relatedModelName, rel.name) | |
for rel in target_object.specify_model.relationships | |
if api.is_dependent_field(target_object, rel.name)] | |
dependant_table_names = set([rel[0] for rel in dependant_relationships]) | |
# Get all of the columns in all of the tables of specify the are foreign keys referencing model ID | |
foreign_key_cols = [] | |
for table in spmodels.datamodel.tables: | |
for relationship in table.relationships: | |
if relationship.relatedModelName.lower() == model_name.lower(): | |
foreign_key_cols.append((table.name, relationship.name)) | |
progress(0, len(foreign_key_cols)) if progress is not None else None | |
# Build query to update all of the records with foreign keys referencing the model ID | |
for table_name, column_names in groupby(foreign_key_cols, lambda x: x[0]): | |
foreign_table = spmodels.datamodel.get_table(table_name) | |
if foreign_table is None: | |
continue | |
try: | |
foreign_model = getattr(spmodels, table_name.lower().title()) | |
except ValueError: | |
continue | |
# Handle case of updating a large amount of record ids in a foreign table. | |
# Example: handle case of updating a large amount of agent ids in the audit logs. | |
# Fix by optimizing the query by consolidating it here | |
if model_name.lower() in MERGING_OPTIMIZATION_FIELDS and \ | |
table_name.lower() in MERGING_OPTIMIZATION_FIELDS[model_name.lower()]: | |
for field_name in MERGING_OPTIMIZATION_FIELDS[model_name.lower()][table_name.lower()]: | |
query = Q(**{field_name: old_model_ids[0]}) | |
for old_model_id in old_model_ids[1:]: | |
query.add(Q(**{field_name: old_model_id}), Q.OR) | |
foreign_model.objects.filter(query).update(**{field_name: new_model_id}) | |
progress(1, 0) if progress is not None else None | |
continue | |
apply_order = add_ordering_to_key(table_name.lower().title()) | |
# BUG: timestampmodified could be null for one record, and not the other | |
new_key_fields = ('timestampcreated', 'timestampmodified', 'id') \ | |
if foreign_table.get_field('timestampCreated') is not None \ | |
else () # Consider using id here | |
key_function = lambda x: apply_order(x, new_key_fields) | |
for col in [c[1] for c in column_names]: | |
progress(1, 0) if progress is not None else None | |
# Determine the field name to filter on | |
field_name = col.lower() | |
field_name_id = f'{field_name}_id' | |
if not hasattr(foreign_model, field_name_id): | |
continue | |
# Filter the objects in the foreign model that references the old target model | |
foreign_objects = filter_and_lock_target_objects(foreign_model, old_model_ids, field_name_id) | |
# Update and save the foreign model objects with the new_model_id. | |
# Locking foreign objects in the beginning because another transaction could update records, and we will | |
# then either overwrite or delete that change if we iterate to it much later. | |
for obj in foreign_objects: | |
# If it is a dependent field, delete the object instead of updating it. | |
# This is done in order to avoid duplicates | |
if table_name in dependant_table_names: | |
# Note: need to handle case where deletion throws error because it is referenced my other records | |
try: | |
clean_fields_pre_delete(obj) | |
obj.delete() | |
except ProtectedError as e: | |
# NOTE: Handle ProtectedError in the future. | |
# EXAMPLE: ProtectedError: ("Cannot delete some instances of model 'Address' because they are | |
# referenced through protected foreign keys: | |
# 'Division.address'.", {<Division: Division object (2)>}) | |
raise | |
continue | |
# Set new value for the field | |
setattr(obj, field_name_id, new_model_id) | |
def record_merge_recur(row_to_lock=None): | |
""" Recursively run another merge process to resolve uniqueness constraints. | |
TODO: Add more sanity checks here. | |
An important, and hard to catch case being missed: | |
Between the exception being raised, and record_merge_recur setting a lock, another transaction | |
could alter the row, and cause the uniqueness constraint to be invalid. In this case, we would | |
delete a record that we didn't need to. | |
""" | |
# Probably could lock more rows than needed. | |
# We immediately rollback if more than 1, so this is fine. | |
foreign_record_lst = filter_and_lock_target_objects(foreign_model, row_to_lock, 'id') \ | |
if row_to_lock is not None \ | |
else foreign_model.objects.filter(**{field_name_id: new_model_id}).select_for_update() | |
foreign_record_count = foreign_record_lst.count() | |
if foreign_record_count > 1: | |
# NOTE: Maybe try handling multiple possible row that are potentially causes the conflict. | |
# Would have to go through all constraints and check records based on columns in each constraint. | |
# This case probably is no longer needed to be handled since records are fetched by primary | |
# keys now, and uniqueness constraints are handled via business exceptions. | |
raise FailedMergingException(http.HttpResponseNotAllowed( | |
'Error! Multiple records violating uniqueness constraints in ' + table_name)) | |
# Determine which of the records will be assigned as old and new with the timestampcreated field | |
old_record = obj | |
new_record = foreign_record_lst.first() | |
old_record, new_record = sorted([old_record, new_record], key=key_function) | |
# Make a recursive call to record_merge to resolve duplication error | |
response = record_merge_fx(table_name, [old_record.pk], new_record.pk, progress) | |
if old_record.pk != obj.pk: | |
update_record(new_record) | |
return response | |
def update_record(record: models.Model): | |
try: | |
# TODO: Handle case where this obj has been deleted from recursive merge | |
with transaction.atomic(): | |
record.save(update_fields=[field_name_id]) | |
except (IntegrityError, BusinessRuleException) as e: | |
# Catch duplicate error and recursively run record merge | |
rows_to_lock = None | |
if isinstance(e, BusinessRuleException) \ | |
and 'must have unique' in str(e) \ | |
and e.args[1]['table'].lower() == table_name.lower(): | |
# Sanity check because rows can be deleted | |
rows_to_lock = e.args[1]['conflicting'] | |
return record_merge_recur(rows_to_lock) | |
# As long as business rules are updated, this shouldn't be raised. | |
# Still having it for completeness | |
elif e.args[0] == 1062 and "Duplicate" in str(e): | |
return record_merge_recur() | |
else: | |
raise | |
response: http.HttpResponse = update_record(obj) | |
if response is not None and response.status_code != 204: | |
return response | |
# Dedupe by deleting the record that is being replaced and updating the old model ID to the new one | |
for old_model_id in old_model_ids: | |
target_model.objects.get(id=old_model_id).delete() | |
# Update new record with json info, if given | |
has_new_record_info = new_record_info is not None | |
if has_new_record_info and 'new_record_data' in new_record_info and \ | |
new_record_info['new_record_data'] is not None: | |
try: | |
for table_name, _field_name in dependant_relationships: | |
# minor optimization to not fetch unnecessary dependent resources | |
if not table_name.lower().endswith('attachment'): | |
continue | |
field_name = _field_name.lower() | |
# put_resource will drop existing dependent resources. | |
# this will trigger deletion from asset server. | |
# so, cleaning fields here. It does this for all | |
# attachments, which is fine since we just use | |
# whatever front-end sends as the final data | |
[clean_fields_pre_delete(dependent_object) | |
for dependent_object in getattr(target_object, field_name).all() | |
] | |
new_record_data = new_record_info['new_record_data'] | |
target_table = spmodels.datamodel.get_table(model_name.lower()) | |
fix_orderings(target_table, new_record_data) | |
obj = api.put_resource(new_record_info['collection'], | |
new_record_info['specify_user'], | |
model_name, | |
new_model_id, | |
new_record_info['version'], | |
fix_record_data(new_record_data, target_table, target_table.name.lower(), new_model_id, old_model_ids)) | |
except IntegrityError as e: | |
# NOTE: Handle IntegrityError Duplicate entry in the future. | |
# EXAMPLE: IntegrityError: (1062, "Duplicate entry '1-0' for key 'AgentID'") | |
raise | |
# Return http response | |
return http.HttpResponse('', status=204) |
Here, now we define the 204 http in the try block, and return that.
specify7/specifyweb/specify/record_merging.py
Lines 45 to 64 in 0751046
def resolve_record_merge_response(start_function, silent=True): | |
try: | |
response = start_function() | |
except Exception as error: | |
# FEATURE: Add traceback here | |
if isinstance(error, FailedMergingException): | |
logger.info('FailedMergingException') | |
logger.info(error.args[0]) | |
logger.info(traceback.format_exc()) | |
response = error.args[0] | |
elif silent: | |
logger.info(traceback.format_exc()) | |
return http.HttpResponseServerError(content=str(traceback.format_exc()), content_type="application/json") | |
elif type(error.args[0]) == type(http.HttpResponseNotFound): | |
logger.info('HttpResponseNotFound') | |
logger.info(error.args[0]) | |
response = error.args[0] | |
else: | |
raise | |
return response |
I believe the testing instructions for record merging should be more detailed to ensure it functions correctly in various scenarios, not just this one. |
Fixes #4685
Checklist
and self-explanatory (or properly documented)
Testing instructions
use herb32124 db
merge two agents
verify it works
merge any other agents with any other db and verify it works as expected