1 package pl.psnc.dl.ege.tei;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.FileOutputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.io.OutputStream;
9 import java.util.List;
10 import java.util.UUID;
11 import java.util.regex.Pattern;
12
13 import javax.xml.transform.stream.StreamSource;
14
15 import net.sf.saxon.s9api.Processor;
16 import net.sf.saxon.s9api.QName;
17 import net.sf.saxon.s9api.SaxonApiException;
18 import net.sf.saxon.s9api.Serializer;
19 import net.sf.saxon.s9api.XdmAtomicValue;
20 import net.sf.saxon.s9api.XsltCompiler;
21 import net.sf.saxon.s9api.XsltExecutable;
22 import net.sf.saxon.s9api.XsltTransformer;
23
24 import org.apache.log4j.Logger;
25 import org.tei.exceptions.ConfigurationException;
26 import org.tei.tei.DocXTransformationProperties;
27 import org.tei.utils.SaxonProcFactory;
28
29 import pl.psnc.dl.ege.component.Converter;
30 import pl.psnc.dl.ege.configuration.EGEConfigurationManager;
31 import pl.psnc.dl.ege.configuration.EGEConstants;
32 import pl.psnc.dl.ege.exception.ConverterException;
33 import pl.psnc.dl.ege.types.ConversionActionArguments;
34 import pl.psnc.dl.ege.types.DataType;
35 import pl.psnc.dl.ege.utils.EGEIOUtils;
36 import pl.psnc.dl.ege.utils.IOResolver;
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 public class TEIConverter implements Converter {
52
53 private static final String EX_NO_FILE_DATA_WAS_FOUND = "No file data was found for conversion";
54
55 private static final Logger LOGGER = Logger.getLogger(TEIConverter.class);
56
57 public static final String DOCX_ERROR = "Probably trying to convert from DocX with wrong input.";
58
59 private IOResolver ior = EGEConfigurationManager.getInstance()
60 .getStandardIOResolver();
61
62 public void convert(InputStream inputStream, OutputStream outputStream,
63 final ConversionActionArguments conversionDataTypes)
64 throws ConverterException, IOException {
65 boolean found = false;
66 try {
67 for (ConversionActionArguments cadt : ConverterConfiguration.CONVERSIONS) {
68 if (conversionDataTypes.equals(cadt)) {
69 String profile = cadt.getProperties().get(
70 ConverterConfiguration.PROFILE_KEY);
71 LOGGER.debug("Converting from : "
72 + conversionDataTypes.getInputType().toString()
73 + " to "
74 + conversionDataTypes.getOutputType().toString());
75 LOGGER.debug("Selected profile : " + profile);
76 convertTo(inputStream, outputStream, cadt.getOutputType(),
77 profile);
78 found = true;
79 }
80 }
81 } catch (ConfigurationException ex) {
82 LOGGER.error(ex.getMessage(), ex);
83 throw new ConverterException(ex.getMessage());
84 } catch (SaxonApiException ex) {
85
86 if (ex.getMessage() != null
87 && ex.getMessage().contains("FileNotFoundException")
88 && conversionDataTypes.getInputType().getFormat().equals(
89 Format.DOCX.getFormatName())
90 && conversionDataTypes.getInputType().getMimeType().equals(
91 Format.DOCX.getMimeType())) {
92 LOGGER.warn(ex.getMessage(), ex);
93 throw new ConverterException(DOCX_ERROR);
94 }
95 LOGGER.error(ex.getMessage(), ex);
96 throw new ConverterException(ex.getMessage());
97 }
98 if (!found) {
99 throw new ConverterException(
100 ConverterException.UNSUPPORTED_CONVERSION_TYPES);
101 }
102 }
103
104
105
106
107 private void convertTo(InputStream inputStream, OutputStream outputStream,
108 DataType dataType, String profile) throws IOException,
109 SaxonApiException, ConfigurationException, ConverterException {
110 String toMimeType = dataType.getMimeType();
111
112 if (Format.DOCX.getMimeType().equals(toMimeType)) {
113 if (!ConverterConfiguration.checkProfile(profile, Format.DOCX
114 .getProfile())) {
115 LOGGER.warn(ConverterConfiguration.PROFILE_NOT_FOUND_MSG);
116 profile = ConverterConfiguration.DEFAULT_PROFILE;
117 }
118 Processor proc = SaxonProcFactory.getProcessor();
119 XsltCompiler comp = proc.newXsltCompiler();
120 transformToDocX(inputStream, outputStream, proc, comp, profile);
121 }
122
123 else if (Format.XHTML.getMimeType().equals(toMimeType)) {
124 if (!ConverterConfiguration.checkProfile(profile, Format.DOCX
125 .getProfile())) {
126 LOGGER.warn(ConverterConfiguration.PROFILE_NOT_FOUND_MSG);
127 profile = ConverterConfiguration.DEFAULT_PROFILE;
128 }
129
130 performXsltTransformation(inputStream, outputStream, Format.XHTML
131 .getProfile(), profile);
132 }
133
134 else if (Format.LATEX.getMimeType().equals(toMimeType)) {
135 if (!ConverterConfiguration.checkProfile(profile, Format.DOCX
136 .getProfile())) {
137 LOGGER.warn(ConverterConfiguration.PROFILE_NOT_FOUND_MSG);
138 profile = ConverterConfiguration.DEFAULT_PROFILE;
139 }
140 performXsltTransformation(inputStream, outputStream, Format.LATEX
141 .getProfile(), profile);
142 }
143
144 else if (Format.FO.getMimeType().equals(toMimeType)
145 && Format.FO.getFormatName().equals(dataType.getFormat())) {
146 if (!ConverterConfiguration.checkProfile(profile, Format.DOCX
147 .getProfile())) {
148 LOGGER.warn(ConverterConfiguration.PROFILE_NOT_FOUND_MSG);
149 profile = ConverterConfiguration.DEFAULT_PROFILE;
150 }
151 performXsltTransformation(inputStream, outputStream, Format.FO
152 .getProfile(), profile);
153 }
154
155 else if (ConverterConfiguration.XML_MIME.equals(toMimeType)
156 && dataType.getFormat().equals(ConverterConfiguration.TEI)) {
157 if (!ConverterConfiguration.checkProfile(profile, Format.DOCX
158 .getProfile())) {
159 LOGGER.warn(ConverterConfiguration.PROFILE_NOT_FOUND_MSG);
160 profile = ConverterConfiguration.DEFAULT_PROFILE;
161 }
162 transformFromDocX(inputStream, outputStream, profile);
163 }
164 }
165
166
167
168
169 private InputStream prepareInputData(InputStream inputStream, File inTempDir)
170 throws IOException, ConverterException {
171 ior.decompressStream(inputStream, inTempDir);
172
173 File sFile = searchForData(inTempDir, "^.*\\.((?i)xml)$");
174 if (sFile == null) {
175
176 sFile = searchForData(inTempDir, "^.*");
177 if (sFile == null) {
178 throw new ConverterException(EX_NO_FILE_DATA_WAS_FOUND);
179 }
180 }
181 FileInputStream fis = new FileInputStream(sFile);
182 return fis;
183 }
184
185
186
187
188 private File searchForData(File dir, String regex) {
189 for (File f : dir.listFiles()) {
190 if (!f.isDirectory() && Pattern.matches(regex, f.getName())) {
191 return f;
192 } else if (f.isDirectory()) {
193 File sf = searchForData(f, regex);
194 if (sf != null) {
195 return sf;
196 }
197 }
198 }
199 return null;
200 }
201
202 private File prepareTempDir() {
203 File inTempDir = null;
204 String uid = UUID.randomUUID().toString();
205 inTempDir = new File(EGEConstants.TEMP_PATH + File.separator + uid
206 + File.separator);
207 inTempDir.mkdir();
208 return inTempDir;
209 }
210
211
212
213
214 private void performXsltTransformation(InputStream inputStream,
215 OutputStream outputStream, String id, String profile)
216 throws IOException, SaxonApiException, ConverterException {
217 FileOutputStream fos = null;
218 InputStream is = null;
219 File inTmpDir = null;
220 File outTempDir = null;
221 try {
222 inTmpDir = prepareTempDir();
223 is = prepareInputData(inputStream, inTmpDir);
224 outTempDir = prepareTempDir();
225 File resFile = new File(outTempDir + File.separator + "res.ege");
226 fos = new FileOutputStream(resFile);
227 Processor proc = SaxonProcFactory.getProcessor();
228 XsltCompiler comp = proc.newXsltCompiler();
229 XsltExecutable exec = comp.compile(resolveConfiguration(id, comp,
230 profile));
231 XsltTransformer transformer = exec.load();
232 setTransformationParameters(transformer, id);
233 transformer.setInitialContextNode(proc.newDocumentBuilder().build(
234 new StreamSource(is)));
235 Serializer result = new Serializer();
236 result.setOutputStream(fos);
237 transformer.setDestination(result);
238 transformer.transform();
239 ior.compressData(outTempDir, outputStream);
240 } finally {
241 try {
242 is.close();
243 } catch (Exception ex) {
244
245 }
246 try {
247 fos.close();
248 } catch (Exception ex) {
249
250 }
251 if (outTempDir != null && outTempDir.exists())
252 EGEIOUtils.deleteDirectory(outTempDir);
253 if (inTmpDir != null && inTmpDir.exists())
254 EGEIOUtils.deleteDirectory(inTmpDir);
255 }
256
257 }
258
259
260
261
262 private void setTransformationParameters(XsltTransformer transformer,
263 String id) {
264 if (Format.XHTML.getId().equals(id)) {
265 transformer.setParameter(new QName("STDOUT"), new XdmAtomicValue(
266 "true"));
267 transformer.setParameter(new QName("splitLevel"),
268 new XdmAtomicValue("-1"));
269 transformer.setParameter(new QName("lang"),
270 new XdmAtomicValue("en"));
271 transformer.setParameter(new QName("doclang"), new XdmAtomicValue(
272 "en"));
273 transformer.setParameter(new QName("documentationLanguage"),
274 new XdmAtomicValue("en"));
275 }
276 }
277
278
279
280
281 private void transformFromDocX(InputStream is, OutputStream os,
282 String profile) throws IOException, SaxonApiException,
283 ConfigurationException, ConverterException {
284 File tmpDir = prepareTempDir();
285 InputStream fis = null;
286 DocXConverter docX = null;
287 try {
288 docX = new DocXConverter(getDocXConfig(profile), true);
289 ior.decompressStream(is, tmpDir);
290
291 File docXFile = searchForData(tmpDir, "^.*\\.((?i)doc|(?i)docx)$");
292 if (docXFile == null) {
293 docXFile = searchForData(tmpDir, "^.*");
294 if (docXFile == null) {
295 throw new ConverterException(EX_NO_FILE_DATA_WAS_FOUND);
296 }
297 }
298 fis = new FileInputStream(docXFile);
299 docX.docXToTEI(fis, os);
300 } finally {
301 if(fis != null){
302 try{
303 fis.close();
304 }catch(Exception ex){
305
306 }
307 }
308 if (tmpDir != null) {
309 EGEIOUtils.deleteDirectory(tmpDir);
310 }
311 if(docX != null){
312 docX.cleanUp();
313 }
314 }
315 }
316
317
318
319
320 private void transformToDocX(InputStream is, OutputStream os,
321 Processor proc, XsltCompiler comp, final String profile)
322 throws IOException, SaxonApiException, ConfigurationException,
323 ConverterException {
324 File inTmpDir = prepareTempDir();
325 File outTmpDir = prepareTempDir();
326 InputStream inputStream = prepareInputData(is, inTmpDir);
327 DocXConverter docX = null;
328 FileOutputStream fos = null;
329 try {
330 docX = new DocXConverter(getDocXConfig(profile));
331
332 docX.mergeTEI(proc.newDocumentBuilder().build(
333 new StreamSource(inputStream)));
334 File oDocXFile = new File(outTmpDir.getAbsolutePath() + File.separator + "result.docx");
335 fos = new FileOutputStream(oDocXFile);
336
337 docX.zipToStream(fos, new File(docX.getDirectoryName()));
338
339 ior.compressData(outTmpDir, os);
340
341 } finally {
342
343 try{
344 inputStream.close();
345 }catch(Exception ex){
346
347 }
348 if(fos != null){
349 try{
350 fos.close();
351 }catch(Exception ex){
352
353 }
354 }
355 if(docX != null){
356 docX.cleanUp();
357 }
358 EGEIOUtils.deleteDirectory(inTmpDir);
359 EGEIOUtils.deleteDirectory(outTmpDir);
360 }
361 }
362
363 private DocXTransformationProperties getDocXConfig(final String profile) {
364
365 final String tmpDir = new File(ConverterConfiguration.PATH).toString()
366 + File.separator + "tei-config" + File.separator + "tmp";
367
368 return new DocXTransformationProperties() {
369
370 @Override
371 public File getOutputFile() {
372 return null;
373 }
374
375 public String docx_pp_getDocXTemplateFile() {
376 return new File(ConverterConfiguration.STYLESHEETS_PATH)
377 .toString()
378 + File.separator
379 + "profiles"
380 + File.separator
381 + profile
382 + File.separator
383 + Format.DOCX.getId()
384 + File.separator + "template.docx";
385 }
386
387 public String docx_pp_getStylesheetCheckDocx() {
388 return null;
389 }
390
391 public String docx_pp_getStylesheetDocx2TEI() {
392 return new File(ConverterConfiguration.STYLESHEETS_PATH)
393 .toString()
394 + File.separator
395 + "profiles"
396 + File.separator
397 + profile
398 + File.separator
399 + Format.DOCX.getId()
400 + File.separator + "from.xsl";
401 }
402
403 public String docx_pp_getStylesheetNormalizeWordStyles() {
404 return new File(ConverterConfiguration.STYLESHEETS_PATH)
405 .toString()
406 + File.separator
407 + Format.DOCX.getId()
408 + File.separator
409 + "normalize-word-style.xsl";
410 }
411
412 public String docx_pp_getStylesheetTEI2Docx() {
413 return new File(ConverterConfiguration.STYLESHEETS_PATH)
414 .toString()
415 + File.separator
416 + "profiles"
417 + File.separator
418 + profile
419 + File.separator
420 + Format.DOCX.getId()
421 + File.separator + "to.xsl";
422 }
423
424 public String docx_pp_getTempDir() {
425 return tmpDir;
426 }
427 };
428 }
429
430
431
432
433
434 private StreamSource resolveConfiguration(final String id,
435 XsltCompiler comp, String profile) throws IOException {
436 comp.setURIResolver(TEIConverterURIResolver
437 .newInstance(ConverterConfiguration.PATH + "/" + "tei-config"
438 + "/" + "stylesheets" + "/" + "profiles" + "/"
439 + profile + "/" + id));
440 return new StreamSource(new FileInputStream(new File(
441 ConverterConfiguration.STYLESHEETS_PATH + "profiles"
442 + File.separator + profile + File.separator + id
443 + File.separator + "to.xsl")));
444 }
445
446 public List<ConversionActionArguments> getPossibleConversions() {
447 return (List<ConversionActionArguments>) ConverterConfiguration.CONVERSIONS;
448 }
449
450 }