Skip to content
Open
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
2a5eda8
fix:reverse_hide_component_fields to be set to Falses
CarolineDenis May 29, 2026
e8466af
fix: Do not dedupe localized strings across all languages
CarolineDenis May 29, 2026
25bf8b2
fix: Simplify MultipleObjectsReturned
CarolineDenis May 29, 2026
063a7c6
Fix: Use db_alias for migration db connexion
CarolineDenis May 29, 2026
6cc5ffa
Fix: flips existing row back to isDatabaseConstraint=True
CarolineDenis May 29, 2026
1ee0f1e
@CarolineDenis
CarolineDenis May 29, 2026
f0dd766
Fix: verify existing user permissions
CarolineDenis May 29, 2026
6dfe4eb
Fix: Remove the use of f-string
CarolineDenis May 29, 2026
8893687
fix: prevent incorrect reuse when pairing tree defs and disciplines
CarolineDenis May 29, 2026
32e2972
fix: remove exclusion so partially migrated disciplines are repaired
CarolineDenis May 29, 2026
6d1403c
Fix: Update splocalecontainer items
CarolineDenis May 29, 2026
d60939d
Fix: Remove shadowing import in geo migration
CarolineDenis May 29, 2026
470d99d
Fix: Revert relative age migration instead of applying again
CarolineDenis May 29, 2026
befb73b
Fix fix order of revert migration in tectonic migration
CarolineDenis May 29, 2026
8a1e35b
Fix: Indentation
CarolineDenis May 29, 2026
82bb520
Fix: Use deterministic ordering before positional pairing
CarolineDenis May 29, 2026
8edb112
Fix: Log only on debug
CarolineDenis May 29, 2026
efc8228
Fix: Fix age type
CarolineDenis May 29, 2026
2366664
Fix: Improve logger
CarolineDenis May 29, 2026
4c0f9f8
Fix: Remove unecessary param in def migration
CarolineDenis May 29, 2026
fac87e9
Fix: Remove projects from legacy tests
CarolineDenis May 29, 2026
c8663e9
Fix: Change settings import
CarolineDenis May 29, 2026
a940612
Fix:Incomplete field initialization in conditional createt
CarolineDenis May 29, 2026
4e1d45a
Fix: Incomplete field initialization in conditional create
CarolineDenis May 29, 2026
e228051
Fix: Put model name in lower case for consistency and reversability
CarolineDenis May 29, 2026
aff97b8
Fix: Revert chnages
CarolineDenis May 29, 2026
1e13637
Fix: Remove double id rank
CarolineDenis May 29, 2026
624deee
Fix: Indentation
CarolineDenis May 29, 2026
cd740a7
Fix: Add strict=False to allow diff length
CarolineDenis May 29, 2026
05f6298
Fix: Remove duplicate
CarolineDenis May 29, 2026
cf79196
Fix: Remove using from geo migration
CarolineDenis May 29, 2026
c341f55
Fix: Bring back legacy project test
CarolineDenis May 29, 2026
72414b6
Fix: Remove values_list() in favor of .all
CarolineDenis May 29, 2026
7112c84
fix: stop shadowing catnum unique in collection function for migration
melton-jason Jun 2, 2026
eda65ab
refactor: improve readability of condition when enabling editable uni…
melton-jason Jun 2, 2026
6e37e1e
fix: simplify fix_global_rules helper function
melton-jason Jun 3, 2026
48face1
fix: use manager over base manager for tree patches
melton-jason Jun 3, 2026
fdfe21b
fix: remove check for old newly created DBs in specify 7
melton-jason Jun 3, 2026
96bf0cc
fix: bring back is_sp6_user_permissions_migrated
melton-jason Jun 3, 2026
cfad41f
fix: change ordering of admin checks to evaluate more common first
melton-jason Jun 3, 2026
2e1835e
fix: remove unused query
melton-jason Jun 3, 2026
f4d6491
refactor: use defaults in get_or_create for Roles
melton-jason Jun 3, 2026
f792d5e
chore: add refactor note
melton-jason Jun 3, 2026
91aaf38
refactor: move DEBUG check into log_sqlalchemy_query
melton-jason Jun 3, 2026
6b4d796
fix: remove unused log_sqlalchemy_query function
melton-jason Jun 3, 2026
ab8320b
fix: incorrect Discipline -> tectonicunittreedef pairing when resolvi…
melton-jason Jun 3, 2026
4196c1e
refactor: optimize fix_taxon_treedef_discipline_links to one query
melton-jason Jun 3, 2026
6e1902e
refactor: move fix_tectonic_links to TectonicUnit file
melton-jason Jun 3, 2026
39ec012
fix: forward tectonic unit migration functions
melton-jason Jun 4, 2026
9f9df79
fix: use Migrator user for all run_key_migration operations
melton-jason Jun 5, 2026
a95bc71
perf: reduce set_discipline_for_taxon_treedefs to single DB hit
melton-jason Jun 5, 2026
1136e52
refactor: collapse default tectonicunit ranks to tuple
melton-jason Jun 5, 2026
5418c64
fix: simplify and move reverse tectonic root node migration
melton-jason Jun 5, 2026
419d69b
fix: handle case when businessrule app is not ready but migrations ar…
melton-jason Jun 5, 2026
19f5c1a
refactor: move reverse migration to migration file
melton-jason Jun 5, 2026
9ddf13e
fix: account for custom Tectonic Trees when reverting migration
melton-jason Jun 5, 2026
3396fe4
fix: handle case when root node is synonymized in reverse migration
melton-jason Jun 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 11 additions & 51 deletions specifyweb/backend/businessrules/migration_utils.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,31 @@
from typing import Tuple, List

from specifyweb.backend.businessrules.uniqueness_rules import create_uniqueness_rule


def catnum_rule_editable(apps, schema_editor=None):
""" Find any CollectionObject catalogNumber must be unique to Collection
rules which are readonly on the frontend (have isDatabaseConstraint=True)
and set their isDatabaseConstraint=False.

Generally should be run only after migration businessrules/0003 has been
applied
"""
UniquenessRule = apps.get_model("businessrules", "UniquenessRule")

model_rules = UniquenessRule.objects.filter(modelName="Collectionobject", isDatabaseConstraint=True)

catalog_number_rules: List[int] = []
for rule in model_rules:
model_rules = UniquenessRule.objects.filter(
modelName="Collectionobject",
isDatabaseConstraint=True
)

catalog_number_rules: list[int] = []
for rule in model_rules:
rule_fields = rule.uniquenessrulefield_set.all()

fields = rule_fields.filter(isScope=False)
scopes = rule_fields.filter(isScope=True)

# We're only interested in the rule "CollectionObject catalogNumber
# We're only interested in the rule "CollectionObject catalogNumber
# must be unique to Collection"
# We check for length of fields and scopes because get() raises an
# We check for length of fields and scopes because get() raises an
# exception if more than one result is returned
if (len(fields) == 1 and len(scopes) == 1) and (fields.get().fieldPath.lower() == "catalognumber" and scopes.get().fieldPath.lower() == "collection"):
catalog_number_rules.append(rule.id)

rules_to_update = UniquenessRule.objects.filter(id__in=catalog_number_rules)
rules_to_update.update(isDatabaseConstraint=False)


def catnum_rule_uneditable(apps, schema_editor=None):
""" Find any CollectionObject catalogNumber must be unique to Collection
rules which are editable on the frontend (have isDatabaseConstraint=False)
and set their isDatabaseConstraint=True.

Generally should be run when migration businessrules/0003 is being reverted
"""
Discipline = apps.get_model("specify", "Discipline")
UniquenessRule = apps.get_model("businessrules", "UniquenessRule")

for discipline in Discipline.objects.all():
model_rules = UniquenessRule.objects.filter(modelName="Collectionobject", discipline_id=discipline.id, isDatabaseConstraint=False)

has_catalognumber_rule = False
for rule in model_rules:
rule_fields = rule.uniquenessrulefield_set.all()

fields = rule_fields.filter(isScope=False)
scopes = rule_fields.filter(isScope=True)

# We're only interested in the rule "CollectionObject catalogNumber
# must be unique to Collection"
# We check for length of fields and scopes because get() raises an
# exception if more than one result is returned
if (len(fields) == 1 and len(scopes) == 1) and (fields.get().fieldPath.lower() == "catalognumber" and scopes.get().fieldPath.lower() == "collection"):
has_catalognumber_rule = True

if not has_catalognumber_rule:
create_uniqueness_rule(
model_name="Collectionobject",
discipline=discipline,
is_database_constraint=True,
fields=["catalogNumber"],
scopes=["collection"],
registry=apps,
)
Original file line number Diff line number Diff line change
@@ -1,48 +1,64 @@
from django.db import migrations

from specifyweb.backend.businessrules.migration_utils import catnum_rule_editable, catnum_rule_uneditable
from specifyweb.backend.businessrules.migration_utils import catnum_rule_editable
from specifyweb.backend.businessrules.uniqueness_rules import create_uniqueness_rule


def catnum_rule_editable(apps, schema_editor):
UniquenessRule = apps.get_model('businessrules', 'UniquenessRule')
UniquenessRuleField = apps.get_model('businessrules', 'UniquenessRuleField')

candidate_rules_with_field: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule__modelName__iexact='collectionobject', uniquenessrule__isDatabaseConstraint=True, fieldPath__iexact='catalognumber', isScope=False).values_list('uniquenessrule_id', flat=True))
def catnum_rule_uneditable(apps, schema_editor):
""" Find any CollectionObject catalogNumber must be unique to Collection
rules which are editable on the frontend (have isDatabaseConstraint=False)
and set their isDatabaseConstraint=True.

candidate_rules_with_scope: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule_id__in=candidate_rules_with_field, fieldPath__iexact='collection', isScope=True).values_list('uniquenessrule_id', flat=True))
Generally should be run when migration businessrules/0003 is being reverted
"""
Discipline = apps.get_model("specify", "Discipline")
UniquenessRule = apps.get_model("businessrules", "UniquenessRule")

candidate_rules = UniquenessRule.objects.filter(id__in=candidate_rules_with_scope)
candidate_rules.update(isDatabaseConstraint=False)
for discipline in Discipline.objects.all():
# REFACTOR: Some of these queries should be able to be combined to
# improve performance and limit how often we need to hit the database
model_rules = UniquenessRule.objects.filter(
modelName="Collectionobject",
discipline_id=discipline.id,
isDatabaseConstraint=False
)

def catnum_rule_uneditable(apps, schema_editor):
Discipline = apps.get_model('specify', 'Discipline')
UniquenessRule = apps.get_model('businessrules', 'UniquenessRule')
UniquenessRuleField = apps.get_model('businessrules', 'UniquenessRuleField')
has_catalognumber_rule = False
matching_rule_ids: list[int] = []
for rule in model_rules:
rule_fields = rule.uniquenessrulefield_set.all()

for discipline in Discipline.objects.all():
candidate_rules_with_field: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule__modelName__iexact='collectionobject', uniquenessrule__discipline=discipline.id, uniquenessrule__isDatabaseConstraint=False, fieldPath__iexact='catalognumber', isScope=False).values_list('uniquenessrule_id', flat=True))
fields = rule_fields.filter(isScope=False)
scopes = rule_fields.filter(isScope=True)

candidate_rules_with_scope: tuple[int] = tuple(UniquenessRuleField.objects.filter(uniquenessrule_id__in=candidate_rules_with_field, fieldPath__iexact='collection', isScope=True).values_list('uniquenessrule_id', flat=True))
# We're only interested in the rule "CollectionObject catalogNumber
# must be unique to Collection"
# We check for length of fields and scopes because get() raises an
# exception if more than one result is returned
if (len(fields) == 1 and len(scopes) == 1) and (fields.get().fieldPath.lower() == "catalognumber" and scopes.get().fieldPath.lower() == "collection"):
has_catalognumber_rule = True
matching_rule_ids.append(rule.id)

candidate_rules = UniquenessRule.objects.filter(id__in=candidate_rules_with_scope)
if len(candidate_rules) == 0:
if has_catalognumber_rule:
UniquenessRule.objects.filter(
id__in=matching_rule_ids).update(isDatabaseConstraint=True)
else:
create_uniqueness_rule(
model_name='Collectionobject',
model_name="Collectionobject",
discipline=discipline,
is_database_constraint=True,
fields=['catalogNumber'],
scopes=['collection'],
registry=apps
fields=["catalogNumber"],
scopes=["collection"],
registry=apps,
)
else:
candidate_rules.update(isDatabaseConstraint=True)


class Migration(migrations.Migration):
dependencies = [
('businessrules', '0003_catnum_constraint')
]

operations = [
migrations.RunPython(catnum_rule_editable, catnum_rule_uneditable, atomic=True)
migrations.RunPython(catnum_rule_editable,
catnum_rule_uneditable, atomic=True)
]
90 changes: 40 additions & 50 deletions specifyweb/backend/businessrules/uniqueness_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,58 +490,48 @@ def rule_is_global(scopes: Iterable[str]) -> bool:


def fix_global_default_rules(registry=None):
"""
Removes UniquenessRules that are scoped to Discipline that already exist
globally.

There were historically cases where UniquenessRules were incorrectly
created in two places: globally and scoped to a particular discipline.

See https://github.com/specify/specify7/pull/6308#issuecomment-3247556491
"""
UniquenessRule = registry.get_model('businessrules', 'UniquenessRule') \
if registry \
else models.UniquenessRule
UniquenessRuleField = registry.get_model('businessrules', 'UniquenessRuleField') \
if registry \
else models.UniquenessRuleField

global_rule_fields = UniquenessRuleField.objects.filter(
uniquenessrule__discipline__isnull=True
).values(
"uniquenessrule__modelName",
"uniquenessrule__isDatabaseConstraint",
"fieldPath",
"isScope",
)

global_rule_exists = UniquenessRule.objects.filter(
discipline__isnull=True,
modelName=OuterRef("modelName"),
isDatabaseConstraint=OuterRef("isDatabaseConstraint"),
)

discipline_ids = (
UniquenessRule.objects.exclude(discipline__isnull=True)
.values_list("discipline_id", flat=True)
.distinct()
)

for discipline_id in discipline_ids:
with transaction.atomic():
# Delete matching fields for this discipline
matching_fields_qs = UniquenessRuleField.objects.filter(
uniquenessrule__discipline_id=discipline_id
).filter(
Exists(
global_rule_fields.filter(
**{
"uniquenessrule__modelName": OuterRef("uniquenessrule__modelName"),
"uniquenessrule__isDatabaseConstraint": OuterRef("uniquenessrule__isDatabaseConstraint"),
"fieldPath": OuterRef("fieldPath"),
"isScope": OuterRef("isScope"),
}
)
)
)
matching_fields_qs.delete()

# Delete UniquenessRule rows for this discipline that are now empty
empty_rules_qs = (
UniquenessRule.objects.filter(discipline_id=discipline_id)
.annotate(field_count=Count("uniquenessrulefield"))
.filter(field_count=0) # now empty after field deletions
.filter(Exists(global_rule_exists))
global_rule_signatures = {
(
rule.modelName,
rule.isDatabaseConstraint,
frozenset(
(field.fieldPath, field.isScope)
for field in rule.uniquenessrulefield_set.all()
),
)
empty_rules_qs.delete()
for rule in UniquenessRule.objects.filter(
discipline__isnull=True
).prefetch_related("uniquenessrulefield_set")
}

with transaction.atomic():
# REFACTOR: See if we can simplify this even further. We should be able
# to collapse this query -> iteration -> check workflow to a single
# query.
# That would eliminate the N + 1 problem with this current approach,
# where every scoped rule needs to be evaluated.
Comment on lines +532 to +536
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(note for discussion)

I left this comment in the code, but I'll leave this as a comment on the PR for visibility.
There was a noticeable change in the asymptotic growth (time complexity) with this change.

Noticeably, the previous implementation of fix_global_default_rules:

  • Iterated over each distinct non-null DisciplineID
  • For each DisciplineID, performed a minimum of 2 queries to identify the "duplicate" scoped Uniqueness Rule and another 2 queries to delete them

Importantly, the number of Queries and iteration only grew on the number of Disciplines. So assuming the number of Disciplines remained the same, there could be any number of added Uniqueness Rules within those Disciplines without seeing a slowdown or additional database queries.
(This is only partly/theoretically true: there will be a slowdown for those queries if there is a sufficiently large amount of Uniqueness Rules and the queries are performed on unindexed fields or in a way that can't utilize the indexes, though I'm holding that slowdown as negligible or constant between the two approaches).

I've cleaned up the new approach and eliminated extra queries in 6e37e1e (this PR), but there still is a change in the approach:

  • Now we make a DB query to fetch all Uniqueness Rules that are scoped (have a non-null DisciplineID)
  • For each rule, we check whether there is an identical unscoped Uniqueness Rule
  • If there is an identified duplicate, we delete the Uniqueness Rule

Our "bottleneck" is now largely on the amount of scoped Uniqueness Rules, rather than the number of Disciplines.
Practically, I'm not sure how much of a slowdown this would introduce.
If we have the time, we can try and resolve this slowdown (I imagine we can use and filter on annotations on the query for scoped rules for duplicate rules, or just revert back to something like the past approach if there are no bugs), and/or we can evaluate how much of a slowdown this introduces.

for rule in UniquenessRule.objects.exclude(discipline__isnull=True).prefetch_related("uniquenessrulefield_set"):
signature = (
rule.modelName,
rule.isDatabaseConstraint,
frozenset(
(field.fieldPath, field.isScope)
for field in rule.uniquenessrulefield_set.all()
),
)
if signature in global_rule_signatures:
rule.uniquenessrulefield_set.all().delete()
rule.delete()
60 changes: 32 additions & 28 deletions specifyweb/backend/permissions/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,28 @@ def create_admins(apps=apps) -> None:
UserPolicy = apps.get_model('permissions', 'UserPolicy')
Specifyuser = apps.get_model('specify', 'Specifyuser')

if UserPolicy.objects.filter(collection__isnull=True, resource='%', action='%').exists():
# don't do anything if there is already any admin.
return

users = Specifyuser.objects.all()
for user in users:
if is_sp6_user_permissions_migrated(user, apps):
# REFACTOR: Try and fold the following checks into a single query to
# avoid making multiple queries per user.
# Ideally, we only make a single query to fetch all users that:
# - Are not already Institution Admins
# - Have not already seen activity in Sp 7 (don't have Sp7 permissions)
# - (The Institution Admin permission could have been intentionally
# removed)
# - Are admins in Sp 6

# The ordering here for checks here is intentional: it's more likely a
# user has Sp 7 permissions than being an admin, so we do the former
# check first
if is_sp6_user_permissions_migrated(user=user, apps=apps):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(note here on why I brought back is_sp6_user_permissions_migrated)

I brought back is_sp6_user_permissions_migrated here.
It was needed for the following workflow:

  • A user is an admin in Specify 6, and should not be an admin in Specify 7
  • In this scenario, the user's Specify 7 Institution Admin privileges have been explicitly revoked

Previously, because this is a part of Run Key Migration Functions, the user in question would be re-made into an Institutional Admin, even if they were explicitly removed as an admin.

With is_sp6_user_permissions_migrated, we assume that users that don't have Sp 7 permissions can't have been intentionally removed as Specify 7 Institution Admins, so we can safely migrate their Admin preference. Otherwise, we can't tell if the user was intentionally removed as an admin

continue
if UserPolicy.objects.filter(
collection__isnull=True,
specifyuser_id=user.id,
resource="%",
action="%",
).exists():
continue
if is_legacy_admin(user):
UserPolicy.objects.get_or_create(
Expand Down Expand Up @@ -92,14 +107,6 @@ def assign_users_to_roles(apps=apps) -> None:

results = []
with connection.cursor() as cursor:
cursor.execute("""
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_name IN ('specifyuser_spprincipal', 'spuserrole')
AND table_schema = DATABASE();
""")
if cursor.fetchone()[0] < 2:
return # Newly created sp7 databases don't have these sp6 specific tables.
cursor.execute("""
SELECT
u.SpecifyUserID as user_id,
Expand All @@ -112,37 +119,34 @@ def assign_users_to_roles(apps=apps) -> None:
JOIN spprincipal p ON p.SpPrincipalID = up.SpPrincipalID
JOIN collection c ON c.UserGroupScopeId = p.userGroupScopeID
WHERE p.groupType IS NULL
AND u.SpecifyUserID NOT IN (
SELECT ur.specifyuser_id
AND NOT EXISTS (
SELECT 1
FROM spuserrole ur
JOIN sprole r ON r.id = ur.role_id
WHERE r.collection_id = p.usergroupscopeid
)
AND c.UserGroupScopeId NOT IN (
SELECT DISTINCT r.collection_id
FROM spuserrole ur
JOIN sprole r ON r.id = ur.role_id
JOIN collection c ON c.UserGroupScopeId = r.collection_id
WHERE r.collection_id = c.UserGroupScopeId
AND ur.specifyuser_id = u.SpecifyUserID
);
""")

results = cursor.fetchall()

for user_id, user_name, user_type, collection_id, collection_name in results:
if user_type not in {'Manager', 'FullAccess', 'LimitedAccess', 'Guest'}:
# REFACTOR: If we want to exlcude all other roles, why don't we write
# the exlcusion in the query rather than evaluate in Python?
if user_type not in ROLE_NAMES.keys():
continue

role_name = ROLE_NAMES.get(user_type, f"{user_type} - {collection_name}")
role_description = ROLE_DESCRIPTIONS.get(user_type, "No description available.")
logger.info(f"Assigned user {user_name} to role {role_name} for collection {collection_name}.")

role, is_new_role = Role.objects.get_or_create(
role, _ = Role.objects.get_or_create(
collection_id=collection_id,
name=role_name
name=role_name,
defaults={
"description": role_description
}
)
if is_new_role:
role.description = role_description
role.save()
UserRole.objects.get_or_create(
specifyuser_id=user_id,
role=role
Expand Down
5 changes: 2 additions & 3 deletions specifyweb/backend/stored_queries/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,9 +856,8 @@ def execute(
if limit:
query = query.limit(limit)

log_sqlalchemy_query(query)


log_sqlalchemy_query(query) # Debugging
return {"results": apply_special_post_query_processing(query, tableid, field_specs, collection, user)}

def build_query(
Expand Down Expand Up @@ -1065,7 +1064,7 @@ def series_post_query(query, limit=40, offset=0, sort_type=0, co_id_cat_num_pair
and adding a co_id colum and formatted catnum range column.
Sort the results by the first catnum in the range."""

log_sqlalchemy_query(query) # Debugging
log_sqlalchemy_query(query)

def parse_catalog_for_comparing(s):
def check_for_decimal(s):
Expand Down
Loading
Loading