1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package net.sf.exorcist.core;
17
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.util.HashSet;
21 import java.util.Iterator;
22 import java.util.Set;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 import javax.xml.parsers.ParserConfigurationException;
27 import javax.xml.parsers.SAXParser;
28 import javax.xml.parsers.SAXParserFactory;
29
30 import net.sf.exorcist.api.ContentConverter;
31 import net.sf.exorcist.api.ContentException;
32 import net.sf.exorcist.api.ContentState;
33
34 import org.xml.sax.SAXException;
35 import org.xml.sax.helpers.DefaultHandler;
36
37 /***
38 * TODO
39 */
40 public class TrimAttachmentsConverter implements ContentConverter {
41
42 private static final Pattern PATTERN = Pattern.compile("[0-9a-f]{40}");
43
44 /*** {@inheritDoc} */
45 public void convertContent(ContentState state) throws ContentException {
46 try {
47 final Set hashes = new HashSet(state.getAttachmentHashes());
48
49 InputStream content = state.getContent();
50 try {
51 SAXParserFactory factory = SAXParserFactory.newInstance();
52 SAXParser parser = factory.newSAXParser();
53 parser.parse(content, new DefaultHandler() {
54
55 private StringBuffer buffer = new StringBuffer();
56
57 public void characters(char[] ch, int start, int length) {
58 buffer.append(ch, start, length);
59 }
60
61 public void endElement(String uri, String local, String name) {
62 Matcher matcher = PATTERN.matcher(buffer);
63 while (matcher.find()) {
64 hashes.remove(matcher.group());
65 }
66 buffer.setLength(0);
67 }
68 });
69 } catch (SAXException e) {
70 throw new ContentException(e);
71 } catch (ParserConfigurationException e) {
72 throw new ContentException(e);
73 } finally {
74 content.close();
75 }
76
77 Iterator iterator = hashes.iterator();
78 while (iterator.hasNext()) {
79 state.removeAttachment((String) iterator.next());
80 }
81 } catch (IOException e) {
82 throw new ContentException(e);
83 }
84 }
85
86 }