diff --git a/app/models/hyrax/change_set.rb b/app/models/hyrax/change_set.rb index 2c941998ec..bb433e8b91 100644 --- a/app/models/hyrax/change_set.rb +++ b/app/models/hyrax/change_set.rb @@ -29,6 +29,8 @@ def self.ChangeSet(resource_class) end class ChangeSet < Valkyrie::ChangeSet + validates_with Hyrax::ControlledVocabularyValidator + ## # @api public # diff --git a/app/validators/hyrax/controlled_vocabulary_validator.rb b/app/validators/hyrax/controlled_vocabulary_validator.rb new file mode 100644 index 0000000000..36c7551bb1 --- /dev/null +++ b/app/validators/hyrax/controlled_vocabulary_validator.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true +module Hyrax + ## + # Validates that controlled vocabulary properties contain only active + # terms from their corresponding local QA authority. + # + # Properties are matched to authorities dynamically via + # +Qa::Authorities::Local.subauthorities+, using the singularized + # authority name to match property names on the change set. + # + # Only covers local authorities (file-based and table-based). + # Remote authorities (e.g. Geonames) are out of scope. + class ControlledVocabularyValidator < ActiveModel::Validator + def validate(record) + return unless Flipflop.validate_local_controlled_vocabulary? + + active_terms_by_property(record).each do |property, terms| + values = Array.wrap(record.public_send(property)).reject(&:blank?) + next if values.empty? + + invalid = values.reject { |v| terms.include?(v) } + invalid.each do |v| + record.errors.add(property, "#{property.to_s.humanize} contains unrecognized value: #{v}") + end + end + end + + private + + ## + # @example + # # { "license" => ["http://creativecommons.org/licenses/by/4.0/", ...], + # # "resource_type" => ["Article", "Book", ...] } + # + # @return [Hash{String => Array}] + def active_terms_by_property(record) + authorities = Qa::Authorities::Local.subauthorities + + # { "license" => "licenses", "resource_type" => "resource_types", ... } + property_to_authority = {}.tap do |hash| + authorities.each { |name| hash[name.singularize] = name } + end + + properties = record.fields.keys.map(&:to_s) + + {}.tap do |result| + (properties & property_to_authority.keys).each do |property| + authority = Qa::Authorities::Local.subauthority_for(property_to_authority[property]) + terms = authority.all + next if terms.empty? + + result[property] = terms.filter_map { |term| term[:id] if active?(term) } + end + end + end + + ## + # Terms without an +active+ field (e.g. resource_types) are + # treated as active, matching QA's +FileBasedAuthority#all+ behavior. + def active?(term) + term.fetch(:active, true) != false + end + end +end diff --git a/config/features.rb b/config/features.rb index 1214a67896..8975d14049 100644 --- a/config/features.rb +++ b/config/features.rb @@ -36,6 +36,10 @@ feature :read_only, default: false, description: "Put the system into read-only mode. Deposits, edits, approvals and anything that makes a change to the data will be disabled." + + feature :validate_local_controlled_vocabulary, + default: false, + description: "Validate local controlled vocabulary." end group :repository_management do diff --git a/spec/validators/hyrax/controlled_vocabulary_validator_spec.rb b/spec/validators/hyrax/controlled_vocabulary_validator_spec.rb new file mode 100644 index 0000000000..e21d6089b5 --- /dev/null +++ b/spec/validators/hyrax/controlled_vocabulary_validator_spec.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +RSpec.describe Hyrax::ControlledVocabularyValidator do + subject(:validator) { described_class.new } + + let(:resource) { build(:hyrax_work) } + let(:change_set) { Hyrax::ChangeSet.for(resource) } + + let(:license_terms) do + [ + HashWithIndifferentAccess.new(id: 'http://creativecommons.org/licenses/by/4.0/', term: 'Attribution 4.0', active: true), + HashWithIndifferentAccess.new(id: 'http://creativecommons.org/licenses/by-sa/4.0/', term: 'Attribution-ShareAlike 4.0', active: true), + HashWithIndifferentAccess.new(id: 'http://creativecommons.org/licenses/by/3.0/us/', term: 'Attribution 3.0 United States', active: false) + ] + end + + let(:resource_type_terms) do + [ + HashWithIndifferentAccess.new(id: 'Article', term: 'Article'), + HashWithIndifferentAccess.new(id: 'Book', term: 'Book') + ] + end + + let(:license_authority) { FakeAuthority.new(license_terms) } + let(:resource_type_authority) { FakeAuthority.new(resource_type_terms) } + + before do + allow(Flipflop).to receive(:validate_local_controlled_vocabulary?).and_return(true) + allow(Qa::Authorities::Local).to receive(:subauthorities).and_return(['licenses', 'resource_types']) + allow(Qa::Authorities::Local).to receive(:subauthority_for).with('licenses').and_return(license_authority) + allow(Qa::Authorities::Local).to receive(:subauthority_for).with('resource_types').and_return(resource_type_authority) + end + + describe '#validate' do + context 'when the change set has no controlled vocabulary fields' do + it 'adds no errors' do + validator.validate(change_set) + + expect(change_set.errors).to be_empty + end + end + + context 'when fields have valid terms' do + before do + allow(change_set).to receive(:fields).and_return( + 'license' => [], + 'resource_type' => [] + ) + allow(change_set).to receive(:license).and_return(['http://creativecommons.org/licenses/by/4.0/']) + allow(change_set).to receive(:resource_type).and_return(['Article']) + end + + it 'adds no errors' do + validator.validate(change_set) + + expect(change_set.errors).to be_empty + end + end + + context 'when fields have blank values' do + before do + allow(change_set).to receive(:fields).and_return('license' => []) + allow(change_set).to receive(:license).and_return(['', nil]) + end + + it 'adds no errors' do + validator.validate(change_set) + + expect(change_set.errors).to be_empty + end + end + + context 'when an authority has no terms' do + let(:license_authority) { FakeAuthority.new([]) } + + before do + allow(change_set).to receive(:fields).and_return('license' => []) + allow(change_set).to receive(:license).and_return(['anything']) + end + + it 'adds no errors' do + validator.validate(change_set) + + expect(change_set.errors).to be_empty + end + end + + context 'when a field has an invalid term' do + before do + allow(change_set).to receive(:fields).and_return('license' => []) + allow(change_set).to receive(:license).and_return(['http://bogus.example.com/fake']) + end + + it 'adds errors to the change set' do + validator.validate(change_set) + + expect(change_set.errors[:license]).to be_present + expect(change_set.errors.full_messages).to include(match(/license.*unrecognized.*bogus/i)) + end + end + + context 'when multiple fields have invalid terms' do + before do + allow(change_set).to receive(:fields).and_return( + 'license' => [], + 'resource_type' => [] + ) + allow(change_set).to receive(:license).and_return(['bad-license']) + allow(change_set).to receive(:resource_type).and_return(['NotARealType']) + end + + it 'adds errors for all invalid fields' do + validator.validate(change_set) + + expect(change_set.errors[:license]).to be_present + expect(change_set.errors[:resource_type]).to be_present + end + end + + context 'when a field has an inactive term' do + before do + allow(change_set).to receive(:fields).and_return('license' => []) + allow(change_set).to receive(:license).and_return(['http://creativecommons.org/licenses/by/3.0/us/']) + end + + it 'adds errors for the inactive term' do + validator.validate(change_set) + + expect(change_set.errors.full_messages).to include(match(/license.*unrecognized.*by\/3.0\/us/i)) + end + end + + context 'when a term has no active field' do + before do + allow(change_set).to receive(:fields).and_return('resource_type' => []) + allow(change_set).to receive(:resource_type).and_return(['Article']) + end + + it 'treats it as active and adds no errors' do + validator.validate(change_set) + + expect(change_set.errors).to be_empty + end + end + + context 'when a field has a mix of valid and invalid terms' do + before do + allow(change_set).to receive(:fields).and_return('license' => []) + allow(change_set).to receive(:license).and_return( + ['http://creativecommons.org/licenses/by/4.0/', + 'http://bogus.example.com/fake'] + ) + end + + it 'adds errors only for invalid terms' do + validator.validate(change_set) + + expect(change_set.errors.full_messages).to include(match(/license.*unrecognized.*bogus/i)) + expect(change_set.errors.full_messages).not_to include(match(/by\/4.0/i)) + end + end + end + + context 'when the feature is disabled' do + before { allow(Flipflop).to receive(:validate_local_controlled_vocabulary?).and_return(false) } + + it 'skips validation' do + allow(change_set).to receive(:fields).and_return('license' => []) + allow(change_set).to receive(:license).and_return(['http://bogus.example.com/fake']) + + validator.validate(change_set) + + expect(change_set.errors).to be_empty + end + end + + describe 'ChangeSet integration' do + it 'is registered on Hyrax::ChangeSet and inherited by subclasses' do + form = Hyrax::Forms::ResourceForm.new(resource: resource) + allow(form).to receive(:fields).and_return('license' => []) + allow(form).to receive(:license).and_return(['http://bogus.example.com/fake']) + + expect(form.valid?).to be false + expect(form.errors[:license]).to be_present + end + end +end