View Javadoc

1   /*
2    * Copyright 2005 Jukka Zitting <jz@yukatan.fi>
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package net.sf.exorcist.core;
17  
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.util.HashSet;
21  import java.util.Iterator;
22  import java.util.Set;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import javax.xml.parsers.ParserConfigurationException;
27  import javax.xml.parsers.SAXParser;
28  import javax.xml.parsers.SAXParserFactory;
29  
30  import net.sf.exorcist.api.ContentConverter;
31  import net.sf.exorcist.api.ContentException;
32  import net.sf.exorcist.api.ContentState;
33  
34  import org.xml.sax.SAXException;
35  import org.xml.sax.helpers.DefaultHandler;
36  
37  /***
38   * TODO
39   */
40  public class TrimAttachmentsConverter implements ContentConverter {
41  
42      private static final Pattern PATTERN = Pattern.compile("[0-9a-f]{40}");
43      
44      /*** {@inheritDoc} */
45      public void convertContent(ContentState state) throws ContentException {
46          try {
47              final Set hashes = new HashSet(state.getAttachmentHashes());
48  
49              InputStream content = state.getContent();
50              try {
51                  SAXParserFactory factory = SAXParserFactory.newInstance();
52                  SAXParser parser = factory.newSAXParser();
53                  parser.parse(content, new DefaultHandler() {
54                      
55                      private StringBuffer buffer = new StringBuffer();
56                      
57                      public void characters(char[] ch, int start, int length) {
58                          buffer.append(ch, start, length);
59                      }
60                      
61                      public void endElement(String uri, String local, String name) {
62                          Matcher matcher = PATTERN.matcher(buffer);
63                          while (matcher.find()) {
64                              hashes.remove(matcher.group());
65                          }
66                          buffer.setLength(0);
67                      }
68                  });
69              } catch (SAXException e) {
70                  throw new ContentException(e);
71              } catch (ParserConfigurationException e) {
72                  throw new ContentException(e);
73              } finally {
74                  content.close();
75              }
76  
77              Iterator iterator = hashes.iterator();
78              while (iterator.hasNext()) {
79                  state.removeAttachment((String) iterator.next());
80              }
81          } catch (IOException e) {
82              throw new ContentException(e);
83          }
84      }
85  
86  }