Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions app/forms/concerns/hyrax/redirects_field_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def deserialize!(params)

# Builds plain hashes (the persisted shape) from the submitted
# `redirects_attributes` payload. Drops rows marked for destruction
# or with a blank path. Normalizes paths to the canonical form
# stored in the uniqueness ledger.
# or with a blank path. Normalizes paths up-front so the validator
# sees canonical form (so DSpace-style pasted URLs validate cleanly).
def redirects_attributes_populator(fragment:, **_options)
return unless respond_to?(:redirects)
return unless Hyrax.config.redirects_active?
Expand Down
3 changes: 2 additions & 1 deletion app/indexers/hyrax/indexers/redirects_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ def to_solr(*args)
next document unless Hyrax.config.redirects_active?
next document unless resource.respond_to?(:redirects)
# Valkyrie's JSONValueMapper symbolizes hash keys on read; accept either.
# Paths are normalized at write time by Hyrax::RedirectsNormalization.
document['redirects_path_ssim'] = Array(resource.redirects)
.map { |entry| Hyrax::RedirectPathNormalizer.call(entry['path'] || entry[:path]) }
.map { |entry| entry['path'] || entry[:path] }
.reject(&:blank?)
Comment thread
laritakr marked this conversation as resolved.
end
end
Expand Down
38 changes: 38 additions & 0 deletions app/models/concerns/hyrax/redirects_normalization.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true

module Hyrax
# Normalizes redirect path entries on assignment so every write path —
# form submissions, console writes, importers, change-set saves —
# produces canonical data. Read-side consumers can trust the persisted
# shape and skip their own normalization.
#
# Included next to Hyrax::Schema(:redirects) on Hyrax::Work and
# Hyrax::PcdmCollection. The override sits on Valkyrie's set_value
# primitive so it fires under both flex modes (the simple schema's
# generated setter and the m3 singleton-class setter both route through
# set_value).
#
# See documentation/redirects.md.
module RedirectsNormalization
extend ActiveSupport::Concern

def set_value(key, value)
value = normalize_redirects(value) if key.to_sym == :redirects
super
end

private

def normalize_redirects(value)
Array(value).map { |entry| normalize_entry(entry) }
end

def normalize_entry(entry)
return entry unless entry.is_a?(Hash)
normalized = entry.dup
path_key = normalized.key?(:path) ? :path : 'path'
normalized[path_key] = Hyrax::RedirectPathNormalizer.call(normalized[path_key])
normalized
end
end
end
1 change: 1 addition & 0 deletions app/models/hyrax/pcdm_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class PcdmCollection < Hyrax::Resource
include Hyrax::Schema(:core_metadata) if Hyrax.config.collection_include_metadata?
# See documentation/redirects.md for the redirects feature.
include Hyrax::Schema(:redirects) if Hyrax.config.collection_include_metadata? && Hyrax.config.redirects_enabled?
include Hyrax::RedirectsNormalization if Hyrax.config.redirects_enabled?

attribute :collection_type_gid, Valkyrie::Types::String
attribute :member_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID).meta(ordered: true)
Expand Down
2 changes: 1 addition & 1 deletion app/models/hyrax/redirect_path.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

module Hyrax
# ActiveRecord-backed uniqueness ledger for redirect paths.
# ActiveRecord-backed redirects table for global path uniqueness.
#
# Maintained by Hyrax::Transactions::Steps::SyncRedirectPaths and
# Hyrax::Transactions::Steps::RemoveRedirectPaths; queried by
Expand Down
1 change: 1 addition & 0 deletions app/models/hyrax/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class Work < Hyrax::Resource
include Hyrax::Schema(:core_metadata) if Hyrax.config.work_default_metadata
# See documentation/redirects.md for the redirects feature.
include Hyrax::Schema(:redirects) if Hyrax.config.work_default_metadata && Hyrax.config.redirects_enabled?
include Hyrax::RedirectsNormalization if Hyrax.config.redirects_enabled?

attribute :admin_set_id, Valkyrie::Types::ID
attribute :member_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID).meta(ordered: true)
Expand Down
29 changes: 15 additions & 14 deletions documentation/redirects.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,21 +124,22 @@ When the config is off, the loader filters `redirects.yaml` out of the schema se

## Path normalization

Every redirect path in Hyrax — whether typed into a form, looked up by the resolver, written to the uniqueness ledger, or queried by the validator — passes through `Hyrax::RedirectPathNormalizer.call`. This is the single source of truth for "what does this path look like on disk?". Normalization rules:
Every redirect path in Hyrax passes through `Hyrax::RedirectPathNormalizer.call`. This is the single source of truth for "what does this path look like on disk?". Normalization rules:

1. If the input parses as a URL with a scheme and host (e.g. `https://old.example.edu/handle/12345/678`), keep only the path component.
2. Strip query strings (`?utm_source=foo`) and fragments (`#section`).
3. Ensure a leading slash (`handle/123` → `/handle/123`).
4. Strip trailing slashes (`/handle/123/` → `/handle/123`), with the exception that the bare path `/` is preserved.

The normalizer is idempotent — `normalize(normalize(x)) == normalize(x)` — and is wired in at four call sites so they all agree on the canonical form:
The normalizer is idempotent — `normalize(normalize(x)) == normalize(x)`. Normalization happens in two distinct contexts:

- `Hyrax::RedirectsFieldBehavior#redirects_attributes_populator` normalizes each entry's path on form submission, before validation. The normalized form is what the validator sees and what the resource persists.
**On write (resource layer).** `Hyrax::RedirectsNormalization` is included on `Hyrax::Work` and `Hyrax::PcdmCollection` and overrides `set_value` so any assignment to the `redirects` attribute — form save, console write, importer, change-set apply — normalizes each entry's path before persistence. Read-side consumers (`Hyrax::Indexers::RedirectsIndexer`, `Hyrax::Transactions::Steps::SyncRedirectPaths`) trust the persisted shape and do not re-normalize.

**On input (boundary layer).** Three boundary points canonicalize input from outside the resource before consulting the persisted state:

- `Hyrax::RedirectsFieldBehavior#redirects_attributes_populator` normalizes form entries before the form-level validator runs, so a user pasting a full URL (`https://old.example.edu/handle/123?utm=email`) is forgivingly accepted and the validator sees the canonical form.
- `Hyrax::RedirectsController#show` (the resolver) normalizes the incoming request path before the Solr lookup, so `/foo/` and `/foo` both resolve.
- `Hyrax::RedirectsLookup` normalizes its input on construction, so callers can pass any reasonable form.
- `Hyrax::Transactions::Steps::SyncRedirectPaths` normalizes paths before writing to the ledger, as defense in depth.

A user pasting a full URL from an old DSpace page (`https://old.example.edu/handle/123?utm=email`) sees the form quietly accept and persist `/handle/123`.

## Validation

Expand Down Expand Up @@ -172,9 +173,9 @@ end

The validator rejects any redirect path that equals one of these prefixes, or starts with one followed by `/` (so `/admin` is reserved, and `/admin/anything` is reserved, but `/administrator` would *not* be reserved by the `/admin` entry).

### Uniqueness lookup and the `hyrax_redirect_paths` ledger
### Uniqueness lookup and the `hyrax_redirect_paths` table

Global uniqueness is enforced by a Postgres table, `hyrax_redirect_paths`, which has a unique B-tree index on `path`. The table is a derived ledger of every redirect path currently in use, and the unique index gives the hard guarantee that no two records can share a path even under concurrent saves. A second non-unique index on `resource_id` supports the per-resource sync described below.
Global uniqueness is enforced by a Postgres table, `hyrax_redirect_paths`, which has a unique B-tree index on `path`. The table is a derived record of every redirect path currently in use, and the unique index gives the hard guarantee that no two records can share a path even under concurrent saves. A second non-unique index on `resource_id` supports the per-resource sync described below.

`Hyrax::RedirectsLookup` is the single point of truth for "is this path taken?". It queries the table:

Expand All @@ -184,12 +185,12 @@ SELECT 1 FROM hyrax_redirect_paths WHERE path = ? AND resource_id <> ? LIMIT 1;

The validator calls `Hyrax::RedirectsLookup.taken?(path, except_id: record.id)` to give the user friendly feedback at form-submit time. If two simultaneous requests both pass validation (because both checked the table before either committed), the unique index rejects the second one at insert time and the enclosing transaction returns `Failure`.

### Sync between `redirects` attribute and the ledger
### Sync between `redirects` attribute and the redirects table

The ledger is kept in sync by two `dry-transaction` steps composed into the create/update/destroy transactions:
The redirects table is kept in sync by two `dry-transaction` steps composed into the create/update/destroy transactions:

- `Hyrax::Transactions::Steps::SyncRedirectPaths` — runs after the resource is saved (in `WorkCreate`, `WorkUpdate`, `CollectionCreate`, `CollectionUpdate`). Deletes the resource's existing rows and reinserts the current redirect set in a single DB transaction. On `ActiveRecord::RecordNotUnique` (race lost), returns `Failure([:redirect_path_collision, ...])`, which short-circuits the enclosing transaction and surfaces back to the controller. No-op when either the config or the Flipflop is off — there's no point writing ledger rows when the feature isn't actively in use.
- `Hyrax::Transactions::Steps::RemoveRedirectPaths` — runs before `delete_resource` in `WorkDestroy` and `CollectionDestroy`. Clears the resource's rows so deleted resources don't leave dangling claims on redirect paths. Gated only on the config (not the Flipflop): cleanup must happen regardless of whether the feature is currently in active use, so that an admin toggling the Flipflop off mid-deployment doesn't leave orphaned rows that could later collide with new resources after a re-enable.
- `Hyrax::Transactions::Steps::SyncRedirectPaths` — runs after the resource is saved (in `WorkCreate`, `WorkUpdate`, `CollectionCreate`, `CollectionUpdate`). Deletes the resource's existing rows and reinserts the current redirect set in a single DB transaction. On `ActiveRecord::RecordNotUnique` (race lost), returns `Failure([:redirect_path_collision, ...])`, which short-circuits the enclosing transaction and surfaces back to the controller. On any other `ActiveRecord::StatementInvalid` (missing table, schema drift, connection drop), logs the error and returns `Failure([:redirect_path_sync_error, ...])` rather than raising — the resource is already persisted by the time this step runs, so a soft-fail is preferable to a 500 with partial state. No-op when either the config or the Flipflop is off.
- `Hyrax::Transactions::Steps::RemoveRedirectPaths` — runs before `delete_resource` in `WorkDestroy` and `CollectionDestroy`. Clears the resource's rows so deleted resources don't leave dangling claims on redirect paths. Same `StatementInvalid` treatment as the sync step: returns `Failure([:redirect_path_remove_error, ...])` instead of raising. Gated only on the config (not the Flipflop): cleanup must happen regardless of whether the feature is currently in active use, so that an admin toggling the Flipflop off mid-deployment doesn't leave orphaned rows that could later collide with new resources after a re-enable.

Neither step does anything when the config is off, so adopters who don't enable the redirects feature pay no cost for them.

Expand Down Expand Up @@ -287,5 +288,5 @@ Alternatively, gate the inclusion of the calling code itself on `Hyrax.config.re
- `Hyrax::RedirectValidator` (`app/validators/hyrax/redirect_validator.rb`) — the form-level entry validator.
- `Hyrax::RedirectPathNormalizer` (`app/services/hyrax/redirect_path_normalizer.rb`) — canonical-form normalization for redirect paths.
- `Hyrax::RedirectsLookup` (`app/services/hyrax/redirects_lookup.rb`) — the uniqueness lookup against `hyrax_redirect_paths`.
- `Hyrax::RedirectPath` (`app/models/hyrax/redirect_path.rb`) — ActiveRecord model for the `hyrax_redirect_paths` ledger.
- `Hyrax::Transactions::Steps::SyncRedirectPaths` and `Hyrax::Transactions::Steps::RemoveRedirectPaths` (`lib/hyrax/transactions/steps/`) — the transaction steps that keep the ledger in sync with each resource's `redirects` attribute.
- `Hyrax::RedirectPath` (`app/models/hyrax/redirect_path.rb`) — ActiveRecord model for the `hyrax_redirect_paths` table.
- `Hyrax::Transactions::Steps::SyncRedirectPaths` and `Hyrax::Transactions::Steps::RemoveRedirectPaths` (`lib/hyrax/transactions/steps/`) — the transaction steps that keep the redirects table in sync with each resource's `redirects` attribute.
8 changes: 6 additions & 2 deletions lib/hyrax/transactions/steps/remove_redirect_paths.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module Hyrax
module Transactions
module Steps
# A `dry-transaction` step that removes a resource's rows from the
# `hyrax_redirect_paths` uniqueness ledger. Runs as part of the destroy
# `hyrax_redirect_paths` redirects table. Runs as part of the destroy
# transactions so deleted resources don't leave dangling claims on
# redirect paths.
#
Expand All @@ -16,8 +16,12 @@ class RemoveRedirectPaths
# @param [Valkyrie::Resource] resource
# @return [Dry::Monads::Result]
def call(resource)
Hyrax::RedirectPath.where(resource_id: resource.id.to_s).delete_all if removable?(resource)
return Success(resource) unless removable?(resource)
Hyrax::RedirectPath.where(resource_id: resource.id.to_s).delete_all
Success(resource)
rescue ActiveRecord::StatementInvalid => e
Hyrax.logger.error("[redirects] remove_redirect_paths failed: #{e.message}")
Failure([:redirect_path_remove_error, e.message])
end

private
Expand Down
8 changes: 6 additions & 2 deletions lib/hyrax/transactions/steps/sync_redirect_paths.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module Hyrax
module Transactions
module Steps
# A `dry-transaction` step that mirrors a saved resource's `redirects`
# entries into the `hyrax_redirect_paths` uniqueness ledger. The unique
# entries into the `hyrax_redirect_paths` redirects table. The unique
# index on `path` enforces global uniqueness at the DB level — if a
# concurrent save already claimed a path, the insert raises
# ActiveRecord::RecordNotUnique and this step returns Failure, which
Expand All @@ -26,6 +26,9 @@ def call(object)
Success(object)
rescue ActiveRecord::RecordNotUnique => e
Failure([:redirect_path_collision, e.message])
rescue ActiveRecord::StatementInvalid => e
Hyrax.logger.error("[redirects] sync_redirect_paths failed: #{e.message}")
Failure([:redirect_path_sync_error, e.message])
end

private
Expand All @@ -37,8 +40,9 @@ def syncable?(object)

def build_rows(object)
# Valkyrie's JSONValueMapper symbolizes hash keys on read; accept either.
# Paths are normalized at write time by Hyrax::RedirectsNormalization.
paths = Array(object.redirects)
.map { |entry| Hyrax::RedirectPathNormalizer.call(entry['path'] || entry[:path]) }
.map { |entry| entry['path'] || entry[:path] }
.reject(&:blank?)
Comment thread
laritakr marked this conversation as resolved.
.uniq
now = Time.current
Expand Down
15 changes: 15 additions & 0 deletions spec/lib/hyrax/transactions/steps/remove_redirect_paths_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,20 @@
expect(step.call(resource)).to be_success
end
end

context 'when the underlying redirects table query raises StatementInvalid' do
before do
allow(Hyrax::RedirectPath).to receive(:where)
.and_raise(ActiveRecord::StatementInvalid, 'PG::UndefinedTable: relation "hyrax_redirect_paths" does not exist')
allow(Hyrax.logger).to receive(:error)
end

it 'returns Failure with a redirect_path_remove_error tag and logs the error' do
result = step.call(resource)
expect(result).to be_failure
expect(result.failure.first).to eq(:redirect_path_remove_error)
expect(Hyrax.logger).to have_received(:error).with(/remove_redirect_paths failed/)
end
end
end
end
15 changes: 15 additions & 0 deletions spec/lib/hyrax/transactions/steps/sync_redirect_paths_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,21 @@
end
end

context 'when the underlying redirects table query raises StatementInvalid' do
before do
allow(Hyrax::RedirectPath).to receive(:where)
.and_raise(ActiveRecord::StatementInvalid, 'PG::UndefinedTable: relation "hyrax_redirect_paths" does not exist')
allow(Hyrax.logger).to receive(:error)
end

it 'returns Failure with a redirect_path_sync_error tag and logs the error' do
result = step.call(resource)
expect(result).to be_failure
expect(result.failure.first).to eq(:redirect_path_sync_error)
expect(Hyrax.logger).to have_received(:error).with(/sync_redirect_paths failed/)
end
end

context 'when the redirects feature is inactive' do
before { allow(Hyrax.config).to receive(:redirects_active?).and_return(false) }

Expand Down
86 changes: 86 additions & 0 deletions spec/models/concerns/hyrax/redirects_normalization_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# frozen_string_literal: true

RSpec.describe Hyrax::RedirectsNormalization do
let(:resource_class) do
Class.new(Valkyrie::Resource) do
attribute :redirects, Valkyrie::Types::Array.of(Valkyrie::Types::Hash)
include Hyrax::RedirectsNormalization
end
end

let(:resource) { resource_class.new }

describe 'normalization on write' do
it 'strips trailing slashes' do
resource.redirects = [{ 'path' => '/foo/' }]
expect(resource.redirects.first['path']).to eq('/foo')
end

it 'adds a leading slash' do
resource.redirects = [{ 'path' => 'handle/123' }]
expect(resource.redirects.first['path']).to eq('/handle/123')
end

it 'extracts path from a full URL with query string' do
resource.redirects = [{ 'path' => 'https://old.example.edu/handle/123?utm=email' }]
expect(resource.redirects.first['path']).to eq('/handle/123')
end

it 'accepts symbol keys' do
resource.redirects = [{ path: '/foo/' }]
expect(resource.redirects.first[:path]).to eq('/foo')
end

it 'preserves non-path keys on the entry' do
resource.redirects = [{ 'path' => '/foo/', 'canonical' => true, 'sequence' => 0 }]
entry = resource.redirects.first
expect(entry['path']).to eq('/foo')
expect(entry['canonical']).to be true
expect(entry['sequence']).to eq(0)
end

it 'normalizes every entry in a multi-entry array' do
resource.redirects = [{ 'path' => '/foo/' }, { 'path' => 'bar' }]
expect(resource.redirects.map { |e| e['path'] }).to eq(['/foo', '/bar'])
end

it 'is idempotent — re-assigning normalized values is a no-op' do
resource.redirects = [{ 'path' => '/foo/' }]
first = resource.redirects
resource.redirects = first
expect(resource.redirects).to eq(first)
end

it 'leaves non-redirects attributes untouched' do
other_class = Class.new(Valkyrie::Resource) do
attribute :title, Valkyrie::Types::String
attribute :redirects, Valkyrie::Types::Array.of(Valkyrie::Types::Hash)
include Hyrax::RedirectsNormalization
end
r = other_class.new(title: 'A title that ends with /')
expect(r.title).to eq('A title that ends with /')
end
end

describe 'flex-mode behavior (m3 singleton-class setter)' do
let(:flex_class) do
Class.new(Valkyrie::Resource) do
include Hyrax::Flexibility
attribute :redirects, Valkyrie::Types::Array.of(Valkyrie::Types::Hash)
include Hyrax::RedirectsNormalization
end
end

it 'fires under the flex singleton-class setter path' do
# Mirrors how Hyrax::Flexibility.load attaches the schema to the
# singleton class before set_value is called.
resource = flex_class.allocate
resource.send(:initialize, {})
resource.singleton_class.attributes(redirects: Valkyrie::Types::Array.of(Valkyrie::Types::Hash))

resource.set_value(:redirects, [{ 'path' => '/foo/' }])

expect(resource.redirects.first['path']).to eq('/foo')
end
end
end
Loading