Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions app/forms/concerns/hyrax/redirects_field_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ def deserialize!(params)

# Builds plain hashes (the persisted shape) from the submitted
# `redirects_attributes` payload. Drops rows marked for destruction
# or with a blank path. Normalizes paths to the canonical form
# stored in the redirects table.
# or with a blank path. Normalizes paths up-front so the validator
# sees canonical form (so DSpace-style pasted URLs validate cleanly).
# or with a blank path.
def redirects_attributes_populator(fragment:, **_options)
return unless respond_to?(:redirects)
return unless Hyrax.config.redirects_active?
Expand Down
3 changes: 2 additions & 1 deletion app/indexers/hyrax/indexers/redirects_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ def to_solr(*args)
next document unless Hyrax.config.redirects_active?
next document unless resource.respond_to?(:redirects)
# Valkyrie's JSONValueMapper symbolizes hash keys on read; accept either.
# Paths are normalized at write time by Hyrax::RedirectsNormalization.
document['redirects_path_ssim'] = Array(resource.redirects)
.map { |entry| Hyrax::RedirectPathNormalizer.call(entry['path'] || entry[:path]) }
.map { |entry| entry['path'] || entry[:path] }
.reject(&:blank?)
Comment thread
laritakr marked this conversation as resolved.
end
end
Expand Down
38 changes: 38 additions & 0 deletions app/models/concerns/hyrax/redirects_normalization.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true

module Hyrax
# Normalizes redirect path entries on assignment so every write path —
# form submissions, console writes, importers, change-set saves —
# produces canonical data. Read-side consumers can trust the persisted
# shape and skip their own normalization.
#
# Included next to Hyrax::Schema(:redirects) on Hyrax::Work and
# Hyrax::PcdmCollection. The override sits on Valkyrie's set_value
# primitive so it fires under both flex modes (the simple schema's
# generated setter and the m3 singleton-class setter both route through
# set_value).
#
# See documentation/redirects.md.
module RedirectsNormalization
extend ActiveSupport::Concern

def set_value(key, value)
value = normalize_redirects(value) if key.to_sym == :redirects
super
end

private

def normalize_redirects(value)
Array(value).map { |entry| normalize_entry(entry) }
end

def normalize_entry(entry)
return entry unless entry.is_a?(Hash)
normalized = entry.dup
path_key = normalized.key?(:path) ? :path : 'path'
normalized[path_key] = Hyrax::RedirectPathNormalizer.call(normalized[path_key])
normalized
end
end
end
1 change: 1 addition & 0 deletions app/models/hyrax/pcdm_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class PcdmCollection < Hyrax::Resource
include Hyrax::Schema(:core_metadata) if Hyrax.config.collection_include_metadata?
# See documentation/redirects.md for the redirects feature.
include Hyrax::Schema(:redirects) if Hyrax.config.collection_include_metadata? && Hyrax.config.redirects_enabled?
include Hyrax::RedirectsNormalization if Hyrax.config.redirects_enabled?

attribute :collection_type_gid, Valkyrie::Types::String
attribute :member_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID).meta(ordered: true)
Expand Down
1 change: 1 addition & 0 deletions app/models/hyrax/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class Work < Hyrax::Resource
include Hyrax::Schema(:core_metadata) if Hyrax.config.work_default_metadata
# See documentation/redirects.md for the redirects feature.
include Hyrax::Schema(:redirects) if Hyrax.config.work_default_metadata && Hyrax.config.redirects_enabled?
include Hyrax::RedirectsNormalization if Hyrax.config.redirects_enabled?

attribute :admin_set_id, Valkyrie::Types::ID
attribute :member_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID).meta(ordered: true)
Expand Down
13 changes: 7 additions & 6 deletions documentation/redirects.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,21 +124,22 @@ When the config is off, the loader filters `redirects.yaml` out of the schema se

## Path normalization

Every redirect path in Hyrax — whether typed into a form, looked up by the resolver, written to the redirects table, or queried by the validator — passes through `Hyrax::RedirectPathNormalizer.call`. This is the single source of truth for "what does this path look like on disk?". Normalization rules:
Every redirect path in Hyrax passes through `Hyrax::RedirectPathNormalizer.call`. This is the single source of truth for "what does this path look like on disk?". Normalization rules:

1. If the input parses as a URL with a scheme and host (e.g. `https://old.example.edu/handle/12345/678`), keep only the path component.
2. Strip query strings (`?utm_source=foo`) and fragments (`#section`).
3. Ensure a leading slash (`handle/123` → `/handle/123`).
4. Strip trailing slashes (`/handle/123/` → `/handle/123`), with the exception that the bare path `/` is preserved.

The normalizer is idempotent — `normalize(normalize(x)) == normalize(x)` — and is wired in at four call sites so they all agree on the canonical form:
The normalizer is idempotent — `normalize(normalize(x)) == normalize(x)`. Normalization happens in two distinct contexts:

- `Hyrax::RedirectsFieldBehavior#redirects_attributes_populator` normalizes each entry's path on form submission, before validation. The normalized form is what the validator sees and what the resource persists.
**On write (resource layer).** `Hyrax::RedirectsNormalization` is included on `Hyrax::Work` and `Hyrax::PcdmCollection` and overrides `set_value` so any assignment to the `redirects` attribute — form save, console write, importer, change-set apply — normalizes each entry's path before persistence. Read-side consumers (`Hyrax::Indexers::RedirectsIndexer`, `Hyrax::Transactions::Steps::SyncRedirectPaths`) trust the persisted shape and do not re-normalize.

**On input (boundary layer).** Three boundary points canonicalize input from outside the resource before consulting the persisted state:

- `Hyrax::RedirectsFieldBehavior#redirects_attributes_populator` normalizes form entries before the form-level validator runs, so a user pasting a full URL (`https://old.example.edu/handle/123?utm=email`) is forgivingly accepted and the validator sees the canonical form.
- `Hyrax::RedirectsController#show` (the resolver) normalizes the incoming request path before the Solr lookup, so `/foo/` and `/foo` both resolve.
- `Hyrax::RedirectsLookup` normalizes its input on construction, so callers can pass any reasonable form.
- `Hyrax::Transactions::Steps::SyncRedirectPaths` normalizes paths before writing to the redirects table, as defense in depth.

A user pasting a full URL from an old DSpace page (`https://old.example.edu/handle/123?utm=email`) sees the form quietly accept and persist `/handle/123`.

## Validation

Expand Down
3 changes: 2 additions & 1 deletion lib/hyrax/transactions/steps/sync_redirect_paths.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def syncable?(object)

def build_rows(object)
# Valkyrie's JSONValueMapper symbolizes hash keys on read; accept either.
# Paths are normalized at write time by Hyrax::RedirectsNormalization.
paths = Array(object.redirects)
.map { |entry| Hyrax::RedirectPathNormalizer.call(entry['path'] || entry[:path]) }
.map { |entry| entry['path'] || entry[:path] }
.reject(&:blank?)
Comment thread
laritakr marked this conversation as resolved.
.uniq
now = Time.current
Expand Down
86 changes: 86 additions & 0 deletions spec/models/concerns/hyrax/redirects_normalization_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# frozen_string_literal: true

RSpec.describe Hyrax::RedirectsNormalization do
let(:resource_class) do
Class.new(Valkyrie::Resource) do
attribute :redirects, Valkyrie::Types::Array.of(Valkyrie::Types::Hash)
include Hyrax::RedirectsNormalization
end
end

let(:resource) { resource_class.new }

describe 'normalization on write' do
it 'strips trailing slashes' do
resource.redirects = [{ 'path' => '/foo/' }]
expect(resource.redirects.first['path']).to eq('/foo')
end

it 'adds a leading slash' do
resource.redirects = [{ 'path' => 'handle/123' }]
expect(resource.redirects.first['path']).to eq('/handle/123')
end

it 'extracts path from a full URL with query string' do
resource.redirects = [{ 'path' => 'https://old.example.edu/handle/123?utm=email' }]
expect(resource.redirects.first['path']).to eq('/handle/123')
end

it 'accepts symbol keys' do
resource.redirects = [{ path: '/foo/' }]
expect(resource.redirects.first[:path]).to eq('/foo')
end

it 'preserves non-path keys on the entry' do
resource.redirects = [{ 'path' => '/foo/', 'canonical' => true, 'sequence' => 0 }]
entry = resource.redirects.first
expect(entry['path']).to eq('/foo')
expect(entry['canonical']).to be true
expect(entry['sequence']).to eq(0)
end

it 'normalizes every entry in a multi-entry array' do
resource.redirects = [{ 'path' => '/foo/' }, { 'path' => 'bar' }]
expect(resource.redirects.map { |e| e['path'] }).to eq(['/foo', '/bar'])
end

it 'is idempotent — re-assigning normalized values is a no-op' do
resource.redirects = [{ 'path' => '/foo/' }]
first = resource.redirects
resource.redirects = first
expect(resource.redirects).to eq(first)
end

it 'leaves non-redirects attributes untouched' do
other_class = Class.new(Valkyrie::Resource) do
attribute :title, Valkyrie::Types::String
attribute :redirects, Valkyrie::Types::Array.of(Valkyrie::Types::Hash)
include Hyrax::RedirectsNormalization
end
r = other_class.new(title: 'A title that ends with /')
expect(r.title).to eq('A title that ends with /')
end
end

describe 'flex-mode behavior (m3 singleton-class setter)' do
let(:flex_class) do
Class.new(Valkyrie::Resource) do
include Hyrax::Flexibility
attribute :redirects, Valkyrie::Types::Array.of(Valkyrie::Types::Hash)
include Hyrax::RedirectsNormalization
end
end

it 'fires under the flex singleton-class setter path' do
# Mirrors how Hyrax::Flexibility.load attaches the schema to the
# singleton class before set_value is called.
resource = flex_class.allocate
resource.send(:initialize, {})
resource.singleton_class.attributes(redirects: Valkyrie::Types::Array.of(Valkyrie::Types::Hash))

resource.set_value(:redirects, [{ 'path' => '/foo/' }])

expect(resource.redirects.first['path']).to eq('/foo')
end
end
end
Loading