From c61d4d7f130f4a57e372e507f643400926177889 Mon Sep 17 00:00:00 2001 From: philippe tcheriatinsky <philippe.tcherniatinsky@inrae.fr> Date: Wed, 19 Jun 2024 10:30:50 +0200 Subject: [PATCH] Correction de la clef naturelle. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - si la clef naturelle contient un champ vide, il sera remplacé par NULL_KEY par exemple a__NULL_KEY__b - dans le cas d'un composant récursif, si le parent est null alors il sera tout simplement omis de la clef ex a parent de b -> a__b; null parent de a -> a (NULL_KEY__ est omis) --- .../application/configuration/Ltree.java | 1 + .../domain/data/deposit/DataImporter.java | 9 ++- .../domain/data/read/UniquenessBuilder.java | 78 ------------------- .../domain/data/read/UniquenessKeys.java | 65 ---------------- 4 files changed, 7 insertions(+), 146 deletions(-) delete mode 100644 src/main/java/fr/inra/oresing/domain/data/read/UniquenessBuilder.java delete mode 100644 src/main/java/fr/inra/oresing/domain/data/read/UniquenessKeys.java diff --git a/src/main/java/fr/inra/oresing/domain/application/configuration/Ltree.java b/src/main/java/fr/inra/oresing/domain/application/configuration/Ltree.java index bc868b981..0d893db0b 100644 --- a/src/main/java/fr/inra/oresing/domain/application/configuration/Ltree.java +++ b/src/main/java/fr/inra/oresing/domain/application/configuration/Ltree.java @@ -27,6 +27,7 @@ public class Ltree { * Déliminateur entre les différents niveaux d'un ltree postgresql. */ public static final String SEPARATOR = "."; + public static final String NULL_KEY = "NULL_KEY"; private static final Pattern LABEL_INVALID_CHARACTERS_REGEX = Pattern.compile("[^a-zA-Z0-9_]"); private static final Pattern VALID_LABEL_REGEX = Pattern.compile("[a-zA-Z0-9_]+"); private static final Ltree EMPTY_LTREE_SINGLETON = new Ltree(""); diff --git a/src/main/java/fr/inra/oresing/domain/data/deposit/DataImporter.java b/src/main/java/fr/inra/oresing/domain/data/deposit/DataImporter.java index 8df974463..3dab4db76 100644 --- a/src/main/java/fr/inra/oresing/domain/data/deposit/DataImporter.java +++ b/src/main/java/fr/inra/oresing/domain/data/deposit/DataImporter.java @@ -577,14 +577,15 @@ public class DataImporter { .map(lineChecker -> column) ) .toList(); + Function<String, String> nullOrEmptyToNull = partialKey-> Strings.isNullOrEmpty(partialKey)?Ltree.NULL_KEY:partialKey; Function<DataColumn, String> toEscapedValueFromColumnRegardingColumnIsReferenceType = dataColumn -> getEscapedValueFromColumnRegardingColumnIsReferenceType(dataColumn, referenceDatumAfterChecking.referenceDatumAfterChecking()); String naturalKey = dataImporterContext().getNaturalKeyColumns().stream() .map(DataColumn::new) .map(toEscapedValueFromColumnRegardingColumnIsReferenceType) - .filter(Predicate.not(Strings::isNullOrEmpty)) + .map(nullOrEmptyToNull) .collect(Collectors.joining(DataImporterContext.COMPOSITE_NATURAL_KEY_COMPONENTS_SEPARATOR)); Preconditions.checkState(!naturalKey.isEmpty(), ExceptionMessage.NULL_NATURAL_KEY.toMessage(), referenceDatumAfterChecking.lineNumber(), String.join(" - ", dataImporterContext().getNaturalKeyColumnsImportHeaders())); - return Ltree.fromSql(naturalKey); + return Ltree.fromSql(naturalKey.replaceAll("^%s__".formatted(Ltree.NULL_KEY), "")); } String getEscapedValueFromColumnRegardingColumnIsReferenceType(DataColumn dataColumn, DataDatum referenceDatum) { @@ -800,6 +801,8 @@ public class DataImporter { @Override public Ltree computeNaturalKey(ReferenceDatumAfterChecking referenceDatumAfterChecking) { + Function<String, String> nullOrEmptyToNull = partialKey-> Strings.isNullOrEmpty(partialKey)?Ltree.NULL_KEY:partialKey; + final String naturalKeyAsString = dataImporterContext.getKeyColumns().stream() .map(referenceColumn -> { final DataColumnValue referenceColumnValue = referenceDatumAfterChecking.referenceDatumAfterChecking.get(referenceColumn); @@ -809,7 +812,7 @@ public class DataImporter { .map(DataColumnSingleValue.class::cast) .map(DataColumnSingleValue::getValue) .map(Object::toString) - .filter(StringUtils::isNotEmpty) + .map(nullOrEmptyToNull) .map(label -> label.matches(DateType.PATTERN_DATE_REGEXP_FIND_DATE) ? DateType.sorteableDateToFormattedDate(label).replaceAll("/", "_") : label) .map(Ltree::escapeToLabel) .collect(Collectors.joining(DataImporterContext.getCompositeNaturalKeyComponentsSeparator())); diff --git a/src/main/java/fr/inra/oresing/domain/data/read/UniquenessBuilder.java b/src/main/java/fr/inra/oresing/domain/data/read/UniquenessBuilder.java deleted file mode 100644 index 5818b8689..000000000 --- a/src/main/java/fr/inra/oresing/domain/data/read/UniquenessBuilder.java +++ /dev/null @@ -1,78 +0,0 @@ -package fr.inra.oresing.domain.data.read; - -import com.google.common.collect.ImmutableMap; -import fr.inra.oresing.domain.application.Application; -import fr.inra.oresing.domain.application.configuration.StandardDataDescription; -import fr.inra.oresing.domain.checker.type.FieldType; -import fr.inra.oresing.domain.data.deposit.validation.CsvRowValidationCheckResult; -import fr.inra.oresing.domain.data.deposit.validation.DefaultValidationCheckResult; -import fr.inra.oresing.domain.data.Datum; -import fr.inra.oresing.domain.exceptions.ReportErrors; - -import java.util.*; - -public class UniquenessBuilder { - final LinkedHashSet<String> uniquenessDescription; - final Map<UniquenessKeys, List<Integer>> uniquenessInFile = new TreeMap<>(); - private final String dataType; - - public UniquenessBuilder(final Application application, final String dataType) { - super(); - uniquenessDescription = getUniquenessDescription(application, dataType); - this.dataType = dataType; - } - - private LinkedHashSet<String> getUniquenessDescription(final Application application, final String dataName) { - return application.findData(dataName) - .map(StandardDataDescription::naturalKey) - .orElse(application - .findData(dataName) - .map(StandardDataDescription::componentDescriptions) - .map(Map::keySet) - .map(LinkedHashSet::new) - .orElseGet(LinkedHashSet::new) - ); - } - - public List<FieldType> test(final Datum datum, final int lineNumber) { - final UniquenessKeys uniquenessKeys = new UniquenessKeys(datum, uniquenessDescription); - uniquenessInFile - .compute(uniquenessKeys, (k, v) -> v == null ? new LinkedList<>() : v) - .add(lineNumber); - final boolean isInError = uniquenessInFile.get(uniquenessKeys).size() > 1; - return isInError ? null : uniquenessKeys.getValues(); - } - - private CsvRowValidationCheckResult getErrorForEntry(final Map.Entry<UniquenessKeys, List<Integer>> entry) { - return new CsvRowValidationCheckResult(DefaultValidationCheckResult.error("duplicatedLineInDatatype", - ImmutableMap.of("file", - dataType, - "duplicatedRows", - entry.getValue(), - "uniquenessKey", - getUniquenessKey(entry.getKey())) - , null), - entry.getValue().get(0) - ); - } - - public ReportErrors getErrors(final ReportErrors reportErrors) { - final List<CsvRowValidationCheckResult> errors = uniquenessInFile.entrySet().stream() - .filter(entry -> entry.getValue().size() > 1) - .map(this::getErrorForEntry) - .toList(); - reportErrors.addAll(errors); - return reportErrors; - } - - public Map<String, FieldType> getUniquenessKey(final UniquenessKeys uniquenessKeys) { - final Map<String, FieldType> uniquenessKeyMap = new HashMap<>(); - Iterator<String> iterator = uniquenessDescription.iterator(); - int counter = 0; - while (iterator.hasNext()) { - uniquenessKeyMap.put(iterator.next(), uniquenessKeys.getValues().get(counter)); - counter++; - } - return uniquenessKeyMap; - } -} diff --git a/src/main/java/fr/inra/oresing/domain/data/read/UniquenessKeys.java b/src/main/java/fr/inra/oresing/domain/data/read/UniquenessKeys.java deleted file mode 100644 index 4d060ee38..000000000 --- a/src/main/java/fr/inra/oresing/domain/data/read/UniquenessKeys.java +++ /dev/null @@ -1,65 +0,0 @@ -package fr.inra.oresing.domain.data.read; - -import fr.inra.oresing.domain.checker.type.FieldType; -import fr.inra.oresing.domain.checker.type.StringType; -import fr.inra.oresing.domain.data.Datum; - -import java.util.*; -import java.util.stream.Collectors; - -public class UniquenessKeys implements Comparable<UniquenessKeys> { - public List<FieldType> getValues() { - return values; - } - - List<FieldType> values = new LinkedList<>(); - final LinkedHashSet<String> uniquenessDescription; - - public UniquenessKeys(final Datum datum, final LinkedHashSet<String> uniquenessDescription) { - super(); - this.uniquenessDescription = uniquenessDescription; - values = uniquenessDescription.stream() - .map(componentKey -> Optional.ofNullable(datum) - .map(datum1 -> datum1.get(componentKey)) - .orElse(StringType.getStringTypeFromStringValue(""))) - .map(FieldType::copy) - .collect(Collectors.toList()); - } - - public String getKey() { - return values.stream().map(Object::toString).collect(Collectors.joining()); - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - final UniquenessKeys that = (UniquenessKeys) o; - return Objects.equals(getKey(), that.getKey()); - } - - @Override - public int hashCode() { - return Objects.hash(values); - } - - @Override - public int compareTo(final UniquenessKeys uniquenessKeys) { - int compare = -1; - for (int i = 0; i < values.size(); i++) { - compare = compare(i, uniquenessKeys); - if (compare != 0) { - return compare; - } - } - return compare; - } - - int compare(final int i, final UniquenessKeys otherUniqueness) { - return getKey(values.get(i)).compareTo(getKey(otherUniqueness.values.get(i))); - } - - static String getKey(final FieldType fieldType) { - return fieldType.getClass().getSimpleName() + ":" + fieldType; - } -} -- GitLab