Skip to content
Open
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
2a5eda8
fix:reverse_hide_component_fields to be set to Falses
CarolineDenis May 29, 2026
e8466af
fix: Do not dedupe localized strings across all languages
CarolineDenis May 29, 2026
25bf8b2
fix: Simplify MultipleObjectsReturned
CarolineDenis May 29, 2026
063a7c6
Fix: Use db_alias for migration db connexion
CarolineDenis May 29, 2026
6cc5ffa
Fix: flips existing row back to isDatabaseConstraint=True
CarolineDenis May 29, 2026
1ee0f1e
@CarolineDenis
CarolineDenis May 29, 2026
f0dd766
Fix: verify existing user permissions
CarolineDenis May 29, 2026
6dfe4eb
Fix: Remove the use of f-string
CarolineDenis May 29, 2026
8893687
fix: prevent incorrect reuse when pairing tree defs and disciplines
CarolineDenis May 29, 2026
32e2972
fix: remove exclusion so partially migrated disciplines are repaired
CarolineDenis May 29, 2026
6d1403c
Fix: Update splocalecontainer items
CarolineDenis May 29, 2026
d60939d
Fix: Remove shadowing import in geo migration
CarolineDenis May 29, 2026
470d99d
Fix: Revert relative age migration instead of applying again
CarolineDenis May 29, 2026
befb73b
Fix fix order of revert migration in tectonic migration
CarolineDenis May 29, 2026
8a1e35b
Fix: Indentation
CarolineDenis May 29, 2026
82bb520
Fix: Use deterministic ordering before positional pairing
CarolineDenis May 29, 2026
8edb112
Fix: Log only on debug
CarolineDenis May 29, 2026
efc8228
Fix: Fix age type
CarolineDenis May 29, 2026
2366664
Fix: Improve logger
CarolineDenis May 29, 2026
4c0f9f8
Fix: Remove unecessary param in def migration
CarolineDenis May 29, 2026
fac87e9
Fix: Remove projects from legacy tests
CarolineDenis May 29, 2026
c8663e9
Fix: Change settings import
CarolineDenis May 29, 2026
a940612
Fix:Incomplete field initialization in conditional createt
CarolineDenis May 29, 2026
4e1d45a
Fix: Incomplete field initialization in conditional create
CarolineDenis May 29, 2026
e228051
Fix: Put model name in lower case for consistency and reversability
CarolineDenis May 29, 2026
aff97b8
Fix: Revert chnages
CarolineDenis May 29, 2026
1e13637
Fix: Remove double id rank
CarolineDenis May 29, 2026
624deee
Fix: Indentation
CarolineDenis May 29, 2026
cd740a7
Fix: Add strict=False to allow diff length
CarolineDenis May 29, 2026
05f6298
Fix: Remove duplicate
CarolineDenis May 29, 2026
cf79196
Fix: Remove using from geo migration
CarolineDenis May 29, 2026
c341f55
Fix: Bring back legacy project test
CarolineDenis May 29, 2026
72414b6
Fix: Remove values_list() in favor of .all
CarolineDenis May 29, 2026
7112c84
fix: stop shadowing catnum unique in collection function for migration
melton-jason Jun 2, 2026
eda65ab
refactor: improve readability of condition when enabling editable uni…
melton-jason Jun 2, 2026
6e37e1e
fix: simplify fix_global_rules helper function
melton-jason Jun 3, 2026
48face1
fix: use manager over base manager for tree patches
melton-jason Jun 3, 2026
fdfe21b
fix: remove check for old newly created DBs in specify 7
melton-jason Jun 3, 2026
96bf0cc
fix: bring back is_sp6_user_permissions_migrated
melton-jason Jun 3, 2026
cfad41f
fix: change ordering of admin checks to evaluate more common first
melton-jason Jun 3, 2026
2e1835e
fix: remove unused query
melton-jason Jun 3, 2026
f4d6491
refactor: use defaults in get_or_create for Roles
melton-jason Jun 3, 2026
f792d5e
chore: add refactor note
melton-jason Jun 3, 2026
91aaf38
refactor: move DEBUG check into log_sqlalchemy_query
melton-jason Jun 3, 2026
6b4d796
fix: remove unused log_sqlalchemy_query function
melton-jason Jun 3, 2026
ab8320b
fix: incorrect Discipline -> tectonicunittreedef pairing when resolvi…
melton-jason Jun 3, 2026
4196c1e
refactor: optimize fix_taxon_treedef_discipline_links to one query
melton-jason Jun 3, 2026
6e1902e
refactor: move fix_tectonic_links to TectonicUnit file
melton-jason Jun 3, 2026
39ec012
fix: forward tectonic unit migration functions
melton-jason Jun 4, 2026
9f9df79
fix: use Migrator user for all run_key_migration operations
melton-jason Jun 5, 2026
a95bc71
perf: reduce set_discipline_for_taxon_treedefs to single DB hit
melton-jason Jun 5, 2026
1136e52
refactor: collapse default tectonicunit ranks to tuple
melton-jason Jun 5, 2026
5418c64
fix: simplify and move reverse tectonic root node migration
melton-jason Jun 5, 2026
419d69b
fix: handle case when businessrule app is not ready but migrations ar…
melton-jason Jun 5, 2026
19f5c1a
refactor: move reverse migration to migration file
melton-jason Jun 5, 2026
9ddf13e
fix: account for custom Tectonic Trees when reverting migration
melton-jason Jun 5, 2026
3396fe4
fix: handle case when root node is synonymized in reverse migration
melton-jason Jun 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 11 additions & 51 deletions specifyweb/backend/businessrules/migration_utils.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,31 @@
from typing import Tuple, List

from specifyweb.backend.businessrules.uniqueness_rules import create_uniqueness_rule


def catnum_rule_editable(apps, schema_editor=None):
""" Find any CollectionObject catalogNumber must be unique to Collection
rules which are readonly on the frontend (have isDatabaseConstraint=True)
and set their isDatabaseConstraint=False.

Generally should be run only after migration businessrules/0003 has been
applied
"""
UniquenessRule = apps.get_model("businessrules", "UniquenessRule")

model_rules = UniquenessRule.objects.filter(modelName="Collectionobject", isDatabaseConstraint=True)

catalog_number_rules: List[int] = []
for rule in model_rules:
model_rules = UniquenessRule.objects.filter(
modelName="Collectionobject",
isDatabaseConstraint=True
)

catalog_number_rules: list[int] = []
for rule in model_rules:
rule_fields = rule.uniquenessrulefield_set.all()

fields = rule_fields.filter(isScope=False)
scopes = rule_fields.filter(isScope=True)

# We're only interested in the rule "CollectionObject catalogNumber
# We're only interested in the rule "CollectionObject catalogNumber
# must be unique to Collection"
# We check for length of fields and scopes because get() raises an
# We check for length of fields and scopes because get() raises an
# exception if more than one result is returned
if (len(fields) == 1 and len(scopes) == 1) and (fields.get().fieldPath.lower() == "catalognumber" and scopes.get().fieldPath.lower() == "collection"):
catalog_number_rules.append(rule.id)

rules_to_update = UniquenessRule.objects.filter(id__in=catalog_number_rules)
rules_to_update.update(isDatabaseConstraint=False)


def catnum_rule_uneditable(apps, schema_editor=None):
""" Find any CollectionObject catalogNumber must be unique to Collection
rules which are editable on the frontend (have isDatabaseConstraint=False)
and set their isDatabaseConstraint=True.

Generally should be run when migration businessrules/0003 is being reverted
"""
Discipline = apps.get_model("specify", "Discipline")
UniquenessRule = apps.get_model("businessrules", "UniquenessRule")

for discipline in Discipline.objects.all():
model_rules = UniquenessRule.objects.filter(modelName="Collectionobject", discipline_id=discipline.id, isDatabaseConstraint=False)

has_catalognumber_rule = False
for rule in model_rules:
rule_fields = rule.uniquenessrulefield_set.all()

fields = rule_fields.filter(isScope=False)
scopes = rule_fields.filter(isScope=True)

# We're only interested in the rule "CollectionObject catalogNumber
# must be unique to Collection"
# We check for length of fields and scopes because get() raises an
# exception if more than one result is returned
if (len(fields) == 1 and len(scopes) == 1) and (fields.get().fieldPath.lower() == "catalognumber" and scopes.get().fieldPath.lower() == "collection"):
has_catalognumber_rule = True

if not has_catalognumber_rule:
create_uniqueness_rule(
model_name="Collectionobject",
discipline=discipline,
is_database_constraint=True,
fields=["catalogNumber"],
scopes=["collection"],
registry=apps,
)
Original file line number Diff line number Diff line change
@@ -1,48 +1,64 @@
from django.db import migrations

from specifyweb.backend.businessrules.migration_utils import catnum_rule_editable, catnum_rule_uneditable
from specifyweb.backend.businessrules.migration_utils import catnum_rule_editable
from specifyweb.backend.businessrules.uniqueness_rules import create_uniqueness_rule


def catnum_rule_editable(apps, schema_editor):
UniquenessRule = apps.get_model('businessrules', 'UniquenessRule')
UniquenessRuleField = apps.get_model('businessrules', 'UniquenessRuleField')

candidate_rules_with_field: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule__modelName__iexact='collectionobject', uniquenessrule__isDatabaseConstraint=True, fieldPath__iexact='catalognumber', isScope=False).values_list('uniquenessrule_id', flat=True))
def catnum_rule_uneditable(apps, schema_editor):
""" Find any CollectionObject catalogNumber must be unique to Collection
rules which are editable on the frontend (have isDatabaseConstraint=False)
and set their isDatabaseConstraint=True.

candidate_rules_with_scope: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule_id__in=candidate_rules_with_field, fieldPath__iexact='collection', isScope=True).values_list('uniquenessrule_id', flat=True))
Generally should be run when migration businessrules/0003 is being reverted
"""
Discipline = apps.get_model("specify", "Discipline")
UniquenessRule = apps.get_model("businessrules", "UniquenessRule")

candidate_rules = UniquenessRule.objects.filter(id__in=candidate_rules_with_scope)
candidate_rules.update(isDatabaseConstraint=False)
for discipline in Discipline.objects.all():
# REFACTOR: Some of these queries should be able to be combined to
# improve performance and limit how often we need to hit the database
model_rules = UniquenessRule.objects.filter(
modelName="Collectionobject",
discipline_id=discipline.id,
isDatabaseConstraint=False
)

def catnum_rule_uneditable(apps, schema_editor):
Discipline = apps.get_model('specify', 'Discipline')
UniquenessRule = apps.get_model('businessrules', 'UniquenessRule')
UniquenessRuleField = apps.get_model('businessrules', 'UniquenessRuleField')
has_catalognumber_rule = False
matching_rule_ids: list[int] = []
for rule in model_rules:
rule_fields = rule.uniquenessrulefield_set.all()

for discipline in Discipline.objects.all():
candidate_rules_with_field: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule__modelName__iexact='collectionobject', uniquenessrule__discipline=discipline.id, uniquenessrule__isDatabaseConstraint=False, fieldPath__iexact='catalognumber', isScope=False).values_list('uniquenessrule_id', flat=True))
fields = rule_fields.filter(isScope=False)
scopes = rule_fields.filter(isScope=True)

candidate_rules_with_scope: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule_id__in=candidate_rules_with_field, fieldPath__iexact='collection', isScope=True).values_list('uniquenessrule_id', flat=True))
# We're only interested in the rule "CollectionObject catalogNumber
# must be unique to Collection"
# We check for length of fields and scopes because get() raises an
# exception if more than one result is returned
if (len(fields) == 1 and len(scopes) == 1) and (fields.get().fieldPath.lower() == "catalognumber" and scopes.get().fieldPath.lower() == "collection"):
has_catalognumber_rule = True
matching_rule_ids.append(rule.id)

candidate_rules = UniquenessRule.objects.filter(id__in=candidate_rules_with_scope)
if len(candidate_rules) == 0:
if has_catalognumber_rule:
UniquenessRule.objects.filter(
id__in=matching_rule_ids).update(isDatabaseConstraint=True)
else:
create_uniqueness_rule(
model_name='Collectionobject',
model_name="Collectionobject",
discipline=discipline,
is_database_constraint=True,
fields=['catalogNumber'],
scopes=['collection'],
registry=apps
fields=["catalogNumber"],
scopes=["collection"],
registry=apps,
)
else:
candidate_rules.update(isDatabaseConstraint=True)


class Migration(migrations.Migration):
dependencies = [
('businessrules', '0003_catnum_constraint')
]

operations = [
migrations.RunPython(catnum_rule_editable, catnum_rule_uneditable, atomic=True)
migrations.RunPython(catnum_rule_editable,
catnum_rule_uneditable, atomic=True)
]
90 changes: 40 additions & 50 deletions specifyweb/backend/businessrules/uniqueness_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,58 +490,48 @@ def rule_is_global(scopes: Iterable[str]) -> bool:


def fix_global_default_rules(registry=None):
"""
Removes UniquenessRules that are scoped to Discipline that already exist
globally.

There were historically cases where UniquenessRules were incorrectly
created in two places: globally and scoped to a particular discipline.

See https://github.com/specify/specify7/pull/6308#issuecomment-3247556491
"""
UniquenessRule = registry.get_model('businessrules', 'UniquenessRule') \
if registry \
else models.UniquenessRule
UniquenessRuleField = registry.get_model('businessrules', 'UniquenessRuleField') \
if registry \
else models.UniquenessRuleField

global_rule_fields = UniquenessRuleField.objects.filter(
uniquenessrule__discipline__isnull=True
).values(
"uniquenessrule__modelName",
"uniquenessrule__isDatabaseConstraint",
"fieldPath",
"isScope",
)

global_rule_exists = UniquenessRule.objects.filter(
discipline__isnull=True,
modelName=OuterRef("modelName"),
isDatabaseConstraint=OuterRef("isDatabaseConstraint"),
)

discipline_ids = (
UniquenessRule.objects.exclude(discipline__isnull=True)
.values_list("discipline_id", flat=True)
.distinct()
)

for discipline_id in discipline_ids:
with transaction.atomic():
# Delete matching fields for this discipline
matching_fields_qs = UniquenessRuleField.objects.filter(
uniquenessrule__discipline_id=discipline_id
).filter(
Exists(
global_rule_fields.filter(
**{
"uniquenessrule__modelName": OuterRef("uniquenessrule__modelName"),
"uniquenessrule__isDatabaseConstraint": OuterRef("uniquenessrule__isDatabaseConstraint"),
"fieldPath": OuterRef("fieldPath"),
"isScope": OuterRef("isScope"),
}
)
)
)
matching_fields_qs.delete()

# Delete UniquenessRule rows for this discipline that are now empty
empty_rules_qs = (
UniquenessRule.objects.filter(discipline_id=discipline_id)
.annotate(field_count=Count("uniquenessrulefield"))
.filter(field_count=0) # now empty after field deletions
.filter(Exists(global_rule_exists))
global_rule_signatures = {
(
rule.modelName,
rule.isDatabaseConstraint,
frozenset(
(field.fieldPath, field.isScope)
for field in rule.uniquenessrulefield_set.all()
),
)
empty_rules_qs.delete()
for rule in UniquenessRule.objects.filter(
discipline__isnull=True
).prefetch_related("uniquenessrulefield_set")
}

with transaction.atomic():
# REFACTOR: See if we can simplify this even further. We should be able
# to collapse this query -> iteration -> check workflow to a single
# query.
# That would eliminate the N + 1 problem with this current approach,
# where every scoped rule needs to be evaluated.
Comment on lines +532 to +536
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(note for discussion)

I left this comment in the code, but I'll leave this as a comment on the PR for visibility.
There was a noticeable change in the asymptotic growth (time complexity) with this change.

Noticeably, the previous implementation of fix_global_default_rules:

  • Iterated over each distinct non-null DisciplineID
  • For each DisciplineID, performed a minimum of 2 queries to identify the "duplicate" scoped Uniqueness Rule and another 2 queries to delete them

Importantly, the number of Queries and iteration only grew on the number of Disciplines. So assuming the number of Disciplines remained the same, there could be any number of added Uniqueness Rules within those Disciplines without seeing a slowdown or additional database queries.
(This is only partly/theoretically true: there will be a slowdown for those queries if there is a sufficiently large amount of Uniqueness Rules and the queries are performed on unindexed fields or in a way that can't utilize the indexes, though I'm holding that slowdown as negligible or constant between the two approaches).

I've cleaned up the new approach and eliminated extra queries in 6e37e1e (this PR), but there still is a change in the approach:

  • Now we make a DB query to fetch all Uniqueness Rules that are scoped (have a non-null DisciplineID)
  • For each rule, we check whether there is an identical unscoped Uniqueness Rule
  • If there is an identified duplicate, we delete the Uniqueness Rule

Our "bottleneck" is now largely on the amount of scoped Uniqueness Rules, rather than the number of Disciplines.
Practically, I'm not sure how much of a slowdown this would introduce.
If we have the time, we can try and resolve this slowdown (I imagine we can use and filter on annotations on the query for scoped rules for duplicate rules, or just revert back to something like the past approach if there are no bugs), and/or we can evaluate how much of a slowdown this introduces.

for rule in UniquenessRule.objects.exclude(discipline__isnull=True).prefetch_related("uniquenessrulefield_set"):
signature = (
rule.modelName,
rule.isDatabaseConstraint,
frozenset(
(field.fieldPath, field.isScope)
for field in rule.uniquenessrulefield_set.all()
),
)
if signature in global_rule_signatures:
rule.uniquenessrulefield_set.all().delete()
rule.delete()
18 changes: 13 additions & 5 deletions specifyweb/backend/patches/migration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,35 @@ def apply_migrations(app_registry, schema_editor=None):
update_coordinates(app_registry, schema_editor)

def update_is_accepted(app_registry, schema_editor=None):
db_alias = schema_editor.connection.alias if schema_editor is not None else "default"

for tree in SPECIFY_TREES:
tree_filters = {
"isaccepted": False,
"accepted" + tree.lower() + "__isnull": True
}

tree_model = app_registry.get_model("specify", tree)
tree_model.objects.filter(**tree_filters).update(isaccepted=True)
tree_model.objects.using(db_alias).filter(**tree_filters).update(isaccepted=True)


def update_coordinates(app_registry, schema_editor=None):
db_alias = schema_editor.connection.alias if schema_editor is not None else "default"

Locality = app_registry.get_model("specify", "Locality")

Locality.objects.filter(lat1text__isnull=True, latitude1__isnull=False) \
Locality.objects.using(db_alias) \
.filter(lat1text__isnull=True, latitude1__isnull=False) \
.update(lat1text=F("latitude1"))

Locality.objects.filter(long1text__isnull=True, longitude1__isnull=False) \
Locality.objects.using(db_alias) \
.filter(long1text__isnull=True, longitude1__isnull=False) \
.update(long1text=F("longitude1"))

Locality.objects.filter(lat2text__isnull=True, latitude2__isnull=False) \
Locality.objects.using(db_alias) \
.filter(lat2text__isnull=True, latitude2__isnull=False) \
.update(lat2text=F("latitude2"))

Locality.objects.filter(long2text__isnull=True, longitude2__isnull=False) \
Locality.objects.using(db_alias) \
.filter(long2text__isnull=True, longitude2__isnull=False) \
.update(long2text=F("longitude2"))
Loading
Loading