Commit e46dff40 authored by marceteau's avatar marceteau
Browse files

refactoring dc + ajout archive.xml au paquet + mapping divers seda1

parent 2314e415
......@@ -23,9 +23,11 @@ package fr.cines.pac.converter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
......@@ -34,23 +36,22 @@ import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAdjusters;
import java.util.AbstractMap;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.compress.archivers.zip.PKWareExtraHeader.HashAlgorithm;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.tika.language.detect.LanguageDetector;
import org.apache.tika.language.detect.LanguageResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ctc.wstx.shaded.msv_core.verifier.jarv.Const;
import fr.gouv.culture.archivesdefrance.seda.v2.EventLogBookOgType;
import fr.gouv.culture.archivesdefrance.seda.v2.LogBookOgType;
import fr.gouv.culture.archivesdefrance.seda.v2.LogBookType;
import fr.gouv.culture.archivesdefrance.seda.v2.ObjectFactory;
/**
......@@ -58,6 +59,8 @@ import fr.gouv.culture.archivesdefrance.seda.v2.ObjectFactory;
*
*/
public abstract class AbstractArchiveConverter implements ManifestConverter {
public static Logger logger = LoggerFactory.getLogger(AbstractArchiveConverter.class);
/**
* positionner à true si on ne traite que le bordereau
*/
......@@ -73,6 +76,9 @@ public abstract class AbstractArchiveConverter implements ManifestConverter {
public static final String DUBLIN_CORE_NAMESPACE = "http://purl.org/dc/elements/1.1/";
protected static final String DUPLICATE_ERROR_LANGUAGE = "E0500";
static final String INGEST_EVENT_TYPE_CODE = "INGEST";
static final String INGEST_EVENT_DETAIL_MESSAGE = "Reprise de l’événement d’archivage issu du SAE PAC";
public static String SIMPLE_DATE_FORMAT = "[0-9]{4}-[0-9]{2}-[0-9]{2}";
public static String SIMPLE_DATE_FORMAT_NO_TIRET = "[0-9]{8}";
......@@ -375,5 +381,44 @@ public abstract class AbstractArchiveConverter implements ManifestConverter {
public void setSipType(String sipType) {
this.sipType = sipType;
}
protected String computeChecksum(String fileName, String type) {
type = type.toUpperCase();
try(InputStream data = new FileInputStream(fileName)) {
if (type.equals("MD5")) {
return DigestUtils.md5Hex(data).toUpperCase();
} else if (type.equals("SHA-512") || type.equals("SHA512")) {
return DigestUtils.sha512Hex(data).toUpperCase();
} else if (type.equals("SHA-256") || type.equals("SHA256")) {
return DigestUtils.sha256Hex(data).toUpperCase();
} else {
logger.error("type de checksum non supporté {}", type);
}
} catch (FileNotFoundException e) {
logger.error("Impossible d'accéder au fichier {}", fileName);
} catch (IOException e) {
logger.error("Impossible d'accéder au fichier {}", fileName);
}
return null;
}
/**
* Génère une entrée LogBook pour tracer l'événement d'archivage dans Arcsys
* @return {@link LogBookType} une entrée LogBook
*/
protected LogBookOgType mapLogBook(String eventDate) {
LogBookOgType logBook = objectFactory.createLogBookOgType();
EventLogBookOgType logEvent = objectFactory.createEventLogBookOgType();
logEvent.setEventIdentifier(UUID.randomUUID().toString());
logEvent.setEventTypeCode(INGEST_EVENT_TYPE_CODE);
logEvent.setEventType(LOGBOOK_EVENT_EVENT_TYPE);
logEvent.setEventDateTime(eventDate);
logEvent.setOutcome("OK");
logEvent.setOutcomeDetail("LFC.ARCHIVAGE_PAC.OK");
logEvent.setOutcomeDetailMessage(INGEST_EVENT_DETAIL_MESSAGE);
logBook.getEvent().add(logEvent);
return logBook;
}
}
......@@ -37,6 +37,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.net.UnknownHostException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.DateFormat;
......@@ -54,7 +55,6 @@ import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.xml.bind.JAXBContext;
......@@ -74,7 +74,6 @@ import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.sax.SAXSource;
import org.apache.commons.codec.digest.DigestUtils;
import org.purl.dc.elements._1.SimpleLiteral;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -118,15 +117,12 @@ import fr.gouv.culture.archivesdefrance.seda.v2.DataObjectRefType;
import fr.gouv.culture.archivesdefrance.seda.v2.DescriptiveMetadataContentType;
import fr.gouv.culture.archivesdefrance.seda.v2.DescriptiveMetadataType;
import fr.gouv.culture.archivesdefrance.seda.v2.DisseminationRuleType;
import fr.gouv.culture.archivesdefrance.seda.v2.EventLogBookOgType;
import fr.gouv.culture.archivesdefrance.seda.v2.EventType;
import fr.gouv.culture.archivesdefrance.seda.v2.FileInfoType;
import fr.gouv.culture.archivesdefrance.seda.v2.FinalActionAppraisalCodeType;
import fr.gouv.culture.archivesdefrance.seda.v2.FormatIdentificationType;
import fr.gouv.culture.archivesdefrance.seda.v2.IdentifierType;
import fr.gouv.culture.archivesdefrance.seda.v2.LevelType;
import fr.gouv.culture.archivesdefrance.seda.v2.LogBookOgType;
import fr.gouv.culture.archivesdefrance.seda.v2.LogBookType;
import fr.gouv.culture.archivesdefrance.seda.v2.ManagementMetadataType;
import fr.gouv.culture.archivesdefrance.seda.v2.ManagementType;
import fr.gouv.culture.archivesdefrance.seda.v2.MessageDigestBinaryObjectType;
......@@ -240,12 +236,12 @@ public class DCToSeda21Converter extends AbstractArchiveConverter {
private static final String PREFIXE_ARK = "prefixeARK";
//Relation est un cas particulier pour la BSU
private static final String RELATION = "relation";
//Cas particulier pour certains SV
private static final String PUBLISHER = "publisher";
/**
* Label présent dans des balises, à ne pas mapper en Tag
*/
private static final Object UN_FILLED = "Non renseigné";
private static final String INGEST_EVENT_TYPE_CODE = "INGEST";
private static final String INGEST_EVENT_DETAIL_MESSAGE = "Reprise de l’événement d’archivage issu du SAE PAC";
private static final String DOCUMENT_TYPE = "DocumentType";
private static Validator validator;
......@@ -929,27 +925,6 @@ public class DCToSeda21Converter extends AbstractArchiveConverter {
sipfichmeta.setTailleEnOctets(BigInteger.valueOf(sipPath.toFile().length()));
sip.getFichMeta().add(sipfichmeta);
}
/**
* Génère une entrée LogBook pour tracer l'événement d'archivage dans Arcsys
* @return {@link LogBookType} une entrée LogBook
*/
private LogBookOgType mapLogBook(String eventDate) {
LogBookOgType logBook = objectFactory.createLogBookOgType();
EventLogBookOgType logEvent = objectFactory.createEventLogBookOgType();
logEvent.setEventIdentifier(UUID.randomUUID().toString());
logEvent.setEventTypeCode(INGEST_EVENT_TYPE_CODE);
logEvent.setEventType(LOGBOOK_EVENT_EVENT_TYPE);
logEvent.setEventDateTime(eventDate);
logEvent.setOutcome("OK");
logEvent.setOutcomeDetail("LFC.ARCHIVAGE_PAC.OK");
logEvent.setOutcomeDetailMessage(INGEST_EVENT_DETAIL_MESSAGE);
logBook.getEvent().add(logEvent);
return logBook;
}
/**
* Génère le bloc de métadonnées <dc:balise spécifique au<br/>
* services versants historiques.
......@@ -959,9 +934,12 @@ public class DCToSeda21Converter extends AbstractArchiveConverter {
* {@link DescriptiveMetadataContentType}
*/
private void mapCinesDc(DocDCType sip, DescriptiveMetadataContentType content) {
for (String publisher : sip.getPublisher()) {
JAXBElement<SimpleLiteral> publisherElement = createDublinCoreMetadata(publisher, "publisher");
content.getAny().add(publisherElement);
boolean generatePublisher = Boolean.parseBoolean(properties.getProperty(PUBLISHER));
if(generatePublisher) {
for (String publisher : sip.getPublisher()) {
JAXBElement<SimpleLiteral> publisherElement = createDublinCoreMetadata(publisher, "publisher");
content.getAny().add(publisherElement);
}
}
if(sip.getDate().equals("s.d.".trim()))
......@@ -1057,41 +1035,6 @@ public class DCToSeda21Converter extends AbstractArchiveConverter {
AppraisalRuleType appraisalRule = objectFactory.createAppraisalRuleType();
String appraisalRuleProperty = properties.getProperty(APPRAISAL_RULE);
/*//Bloc de code qu'il faudra probablement supprimer
// Règles d'évaluation - DUA et sort final
if (docMetaType.getSortFinal() != null) {
String sortFinal = docMetaType.getSortFinal().getValue();
FinalActionAppraisalCodeType finalActionCodeType = null;
if(sortFinal.equals("Conservation définitive")) {
finalActionCodeType = FinalActionAppraisalCodeType.KEEP;
} else {
finalActionCodeType = FinalActionAppraisalCodeType.DESTROY;
}
appraisalRule.setFinalAction(finalActionCodeType);
if(!appraisalRuleProperty.equals("false")) {
RuleIdType ruleIdType = objectFactory.createRuleIdType();
ruleIdType.setValue(appraisalRuleProperty);
XMLGregorianCalendar startDate = null;
try {
startDate = getXMLGregorianCalendar(docMetaType.getDateArchivage(), "yyyy-MM-dd'T'HH:mm:ssX");
//startDate = getXMLGregorianCalendar(docMetaType.getEvaluation().getDateDebut(), "yyyy-MM-dd'T'HH:mm:ssX");
//startDateFormated = getDateFormated(startDate, "yyyy-MM-dd");
} catch (ParseException e) {
e.printStackTrace();
} catch (DatatypeConfigurationException e) {
e.printStackTrace();
}
appraisalRule.getRuleAndStartDate().add(ruleIdType);
appraisalRule.getRuleAndStartDate().add(startDate);
}
}*/
if (docMetaType.getEvaluation() != null) {
if("false".equals(appraisalRuleProperty)) {
......@@ -1201,12 +1144,12 @@ public class DCToSeda21Converter extends AbstractArchiveConverter {
if(!isOnlyManifest()) {
try {
validator.setFile(filePath.toFile());
} catch (UnknownHostException e) {
logger.error("Identification impossible du format du fichier {}", document.getNomFichier());
} catch (IOException e) {
logger.error("problème d'entrée/sortie lecture/écriture sur le fichier {}", document.getNomFichier());
} catch (UnknownFormatException e) {
logger.error("Impossible d'identifier le format du fichier {}", document.getNomFichier());
} catch (Exception e) {
logger.error("Identification impossible du format du fichier {}", document.getNomFichier());
}
format = (ValidableFormat) validator.getIdentifiedFormat();
......@@ -1425,27 +1368,6 @@ public class DCToSeda21Converter extends AbstractArchiveConverter {
}
private String computeChecksum(String fileName, String type) {
type = type.toUpperCase();
try(InputStream data = new FileInputStream(fileName)) {
if (type.equals("MD5")) {
return DigestUtils.md5Hex(data).toUpperCase();
} else if (type.equals("SHA-512")) {
return DigestUtils.sha512Hex(data).toUpperCase();
} else if (type.equals("SHA-256")) {
return DigestUtils.sha256Hex(data).toUpperCase();
} else {
logger.error("type de checksum non supporté {}", type);
}
} catch (FileNotFoundException e) {
logger.error("Impossible d'accéder au fichier {}", fileName);
} catch (IOException e) {
logger.error("Impossible d'accéder au fichier {}", fileName);
}
return null;
}
/**
* Mappe les informations d'un fichier<br/>
* Le nom du fichier SEDA 2.1 ne contient pas le répertoire racine Content<br/>
......
......@@ -35,17 +35,15 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.cines.ip.DataObject;
import fr.cines.ip.InformationPackage;
import fr.cines.pac.converter.exception.DuplicateLanguageException;
import fr.cines.pac.ingest.util.exception.IngestRejectException;
import fr.cines.pac.service.ingest.xml.SedaConstants;
import fr.cines.pac.storage.tar.TarMaker;
import fr.cines.pac.storage.tar.TarMakerImpl;
import fr.gouv.culture.archivesdefrance.seda.v2.ArchiveTransferType;
import fr.gouv.culture.archivesdefrance.seda.v2.DataObjectGroupType;
......@@ -58,13 +56,11 @@ import fr.gouv.culture.archivesdefrance.seda.v2.DataObjectGroupType;
*/
public class Seda1ToSeda21TarMaker implements ArchiveConverter {
/**
* L'utilitaire pour créer des tar
*/
private TarMaker tarMaker;
private static String agency;
private String outputDir;
private String errorLanguageDir;
private static Path propertiesFolder;
private String sipFileName;
......@@ -72,7 +68,7 @@ public class Seda1ToSeda21TarMaker implements ArchiveConverter {
public static Logger logger = LoggerFactory.getLogger(Seda1ToSeda21TarMaker.class);
public Seda1ToSeda21TarMaker(String agency) {
tarMaker = new TarMakerImpl();
new TarMakerImpl();
this.sipFileName = "ArchiveTransfer.xml";
}
......@@ -104,10 +100,7 @@ public class Seda1ToSeda21TarMaker implements ArchiveConverter {
*/
List<File> fileList = new ArrayList<File>();
/**
* Nom du tar, reprends le nom du répertoire versé
*/
String targetName = rootFolderSEDA1.getParent().relativize(rootFolderSEDA1).toString();
rootFolderSEDA1.getParent().relativize(rootFolderSEDA1).toString();
fileList.add(manifest);
......@@ -118,17 +111,18 @@ public class Seda1ToSeda21TarMaker implements ArchiveConverter {
ArchiveTransferType archive = xmlConverter.convertManifest(archiveTransfer, manifest, agency, propertiesFolder);
// On renomme le répertoire racine en Content
renameContentFolder(rootFolderSEDA1);
moveArchiveXML(rootFolderSEDA1, ((Seda1ToSeda21Converter) xmlConverter).getArchiveType());
for (Object dataObject : archive.getDataObjectPackage().getDataObjectGroupOrBinaryDataObjectOrPhysicalDataObject()) {
if (dataObject instanceof DataObjectGroupType) {
DataObjectGroupType dog = (DataObjectGroupType) dataObject;
BinaryDataObjectTypeWithType binary = (BinaryDataObjectTypeWithType) dog.getBinaryDataObjectOrPhysicalDataObject().get(0);
Path uri = Paths.get(binary.getUri());
Path dataObjectPath = uri.getName(0).relativize(uri); // dir/file/txt
Path correctedDataObjectPath = Paths.get(AbstractArchiveConverter.CONTENT_FOLDER).resolve(dataObjectPath);
Path correctedDataObjectPath;
correctedDataObjectPath = Paths.get(rootFolderSEDA1.resolve("Content").resolve(uri.getFileName()).toString());
logger.debug("Chemin corrigé " + correctedDataObjectPath);
Path filePath = rootFolderSEDA1.resolve(correctedDataObjectPath);
if (!Files.exists(filePath))
logger.error("Le fichier {} n'existe pas.", rootFolderSEDA1.resolve(dataObjectPath));
logger.error("Le fichier {} n'existe pas.", filePath);
fileList.add(filePath.toFile());
logger.debug("Fichier ajouté à la liste {}", filePath);
}
......@@ -138,13 +132,32 @@ public class Seda1ToSeda21TarMaker implements ArchiveConverter {
logger.error("Impossible de trouver le fichier Archive.xml : chemin {}", archiveTransfer.getAbsolutePath());
}
logger.info("Création du tar {}", targetName);
try {
File tarFile = tarMaker.tarFiles(fileList, targetName, rootFolderSEDA1.toString(), rootFolderSEDA1.toString());
return tarFile;
Path zipPath = null;
if ("".equals(this.outputDir)) {
zipPath = rootFolderSEDA1.resolveSibling(rootFolderSEDA1.getFileName() + ".zip");
} else {
zipPath = Paths.get(this.outputDir, rootFolderSEDA1.getFileName().toString()).resolveSibling(rootFolderSEDA1.getFileName() + ".zip");
}
logger.info("Création de l'archive SEDA 2.1 {}", zipPath);
AbstractArchiveConverter.zip(fileList, zipPath, rootFolderSEDA1);
// Nettoyage
Files.delete(rootFolderSEDA1.resolve(AbstractArchiveConverter.MANIFEST_FILE_NAME));
Files.walk(rootFolderSEDA1)
.sorted(Comparator.reverseOrder())
.map(Path::toFile)
.forEach(File::delete);
return null;
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
......@@ -170,16 +183,32 @@ public class Seda1ToSeda21TarMaker implements ArchiveConverter {
}
}
/**
* Déplace le Archive.xml ou le ArchiveTransfer.xml dans le Content
* @param rootFolderSEDA1
*/
private void moveArchiveXML(Path rootFolderSEDA1, String archiveType) {
Path archivePath = rootFolderSEDA1.resolve(archiveType);
Path archiveContentPath = rootFolderSEDA1.resolve("Content").resolve(archiveType);
if (!Files.exists(archivePath))
logger.error("Le fichier {} n'existe pas.", archivePath);
try {
Files.move(archivePath, archiveContentPath, StandardCopyOption.REPLACE_EXISTING);
} catch (IOException e) {
logger.error("Impossible de déplacer le fichier {} dans Content", archivePath);
}
}
@Override
public void setOutputDir(String dir) {
// TODO Auto-generated method stub
this.outputDir = dir;
}
@Override
public String getOutputDir() {
// TODO Auto-generated method stub
return null;
return this.outputDir;
}
......@@ -195,9 +224,10 @@ public class Seda1ToSeda21TarMaker implements ArchiveConverter {
}
public String getErrorLanguageDir() {
return null;
return this.errorLanguageDir;
}
public void setErrorLanguageDir(String errorLanguageDir) {
this.errorLanguageDir = errorLanguageDir;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment