이전에 IndexCommit에 대해서 잠깐 확인을 해보았었는데요..
이번에는 IndexDeletionPolicy에 대해서 확인해보려고 합니다.
기본적으로 제공되는 IndexDeletionPolicy의 구현 클래스로는
KeepOnlyLastCommitDeletionPolicy와 SnapShotDeletionPolicy가 있습니다.
KeepOnlyLastCommitDeletionPolicy가 기본이구요.
commit을 할때마다 commit의 정보를 가지고있는 IndexCommit 객체가 생성되는데 이 IndexCommit을
어떻게 할것이냐의 정책이라고 보시면 좋을 것 같습니다.
우선 기본적인 IndexDeletionPolicy에서 제공되는 메서드가 2개가 있습니다. 하나는, onInit 메서드 하나는 onCommit 메서드입니다. KeepOnlyLastCommitDeletionPolicy에서는 oninit 메서드에서는 commit list가 commit 할때마다 넘어오게되고
이를 onCommit 메서드로 넘겨서 제일 마지막 Commit남 남기고 삭제하도록 되어있습니다.
package com.tistory.devyongsik.policy; | |
import java.io.IOException; | |
import java.util.List; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.FieldType; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.FieldInfo.IndexOptions; | |
import org.apache.lucene.index.IndexCommit; | |
import org.apache.lucene.index.IndexDeletionPolicy; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.IndexWriterConfig.OpenMode; | |
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; | |
import org.apache.lucene.index.SnapshotDeletionPolicy; | |
import org.apache.lucene.index.Term; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
import org.junit.Test; | |
public class PolicyTest { | |
@Test | |
public void keepOnlyLastCommitDeletionPolicyTest() throws IOException { | |
String a = "learning perl learning java learning ruby"; | |
String b = "perl test t"; | |
String c = "perl test t learning"; | |
Directory dir = new RAMDirectory(); | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_42); //문서 내용을 분석 할 때 사용 될 Analyzer | |
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_42, analyzer); | |
iwc.setOpenMode(OpenMode.CREATE); | |
IndexDeletionPolicy myPolicy = new MyPolicy(); | |
iwc.setIndexDeletionPolicy(myPolicy); | |
IndexWriter writer = new IndexWriter(dir, iwc); //8. 드디어 IndexWriter를 생성합니다. | |
Document doc1 = new Document(); | |
FieldType f1type = new FieldType(); | |
f1type.setIndexed(true); | |
f1type.setStored(false); | |
f1type.setTokenized(true); | |
f1type.setStoreTermVectors(true); | |
f1type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); | |
Field f1 = new Field("f", a, f1type); | |
doc1.add(f1); | |
writer.addDocument(doc1); | |
writer.commit(); | |
DirectoryReader directoryReader = DirectoryReader.open(dir); | |
IndexCommit indexCommit = directoryReader.getIndexCommit(); | |
System.out.println(indexCommit.getGeneration()); | |
System.out.println(indexCommit.getSegmentCount()); | |
System.out.println(indexCommit.getSegmentsFileName()); | |
System.out.println(indexCommit.getFileNames()); | |
System.out.println(indexCommit.isDeleted()); | |
Document doc2 = new Document(); | |
Field f2 = new Field("f", b, f1type); | |
doc2.add(f2); | |
writer.addDocument(doc2); | |
writer.commit(); | |
DirectoryReader directoryReader2 = DirectoryReader.open(dir); | |
IndexCommit indexCommit2 = directoryReader2.getIndexCommit(); | |
System.out.println(indexCommit2.getGeneration()); | |
System.out.println(indexCommit2.getSegmentCount()); | |
System.out.println(indexCommit2.getSegmentsFileName()); | |
System.out.println(indexCommit2.getFileNames()); | |
System.out.println(indexCommit2.isDeleted()); | |
Document doc3 = new Document(); | |
Field f3 = new Field("f", c, f1type); | |
doc3.add(f3); | |
writer.addDocument(doc3); | |
writer.commit(); | |
DirectoryReader directoryReader3 = DirectoryReader.open(dir); | |
IndexCommit indexCommit3 = directoryReader3.getIndexCommit(); | |
System.out.println(indexCommit3.getGeneration()); | |
System.out.println(indexCommit3.getSegmentCount()); | |
System.out.println(indexCommit3.getSegmentsFileName()); | |
System.out.println(indexCommit3.getFileNames()); | |
System.out.println(indexCommit3.isDeleted()); | |
//delete | |
Term t = new Term("f", "java"); | |
writer.deleteDocuments(t); | |
writer.commit(); | |
DirectoryReader directoryReader4 = DirectoryReader.open(dir); | |
IndexCommit indexCommit4 = directoryReader4.getIndexCommit(); | |
System.out.println(indexCommit4.getGeneration()); | |
System.out.println(indexCommit4.getSegmentCount()); | |
System.out.println(indexCommit4.getSegmentsFileName()); | |
System.out.println(indexCommit4.getFileNames()); | |
System.out.println(indexCommit4.isDeleted()); | |
writer.close(); | |
} | |
private class MyPolicy implements IndexDeletionPolicy { | |
/** Sole constructor. */ | |
public MyPolicy() { | |
} | |
/** | |
* Deletes all commits except the most recent one. | |
*/ | |
@Override | |
public void onInit(List<? extends IndexCommit> commits) { | |
// Note that commits.size() should normally be 1: | |
onCommit(commits); | |
} | |
/** | |
* Deletes all commits except the most recent one. | |
*/ | |
@Override | |
public void onCommit(List<? extends IndexCommit> commits) { | |
// Note that commits.size() should normally be 2 (if not | |
// called by onInit above) | |
System.out.println("commits size : " + commits.size()); | |
System.out.println("commits : " + commits); | |
int size = commits.size(); | |
for(int i=0;i<size-1;i++) { | |
System.out.println("commits.get(i) : " + commits.get(i)); | |
System.out.println("seg count : " + commits.get(i).getSegmentCount()); | |
//commits.get(i).delete(); | |
} | |
} | |
} | |
} | |
//Result | |
commits size : 1 | |
commits : [IndexFileDeleter.CommitPoint(segments_1)] | |
1 | |
1 | |
segments_1 | |
[_0.fnm, _0_Lucene41_0.pos, _0.tvd, _0.nvm, _0_Lucene41_0.pay, _0_Lucene41_0.doc, _0.tvx, segments_1, _0.nvd, _0.fdx, _0.si, _0_Lucene41_0.tim, _0.fdt, _0_Lucene41_0.tip] | |
false | |
commits size : 2 | |
commits : [IndexFileDeleter.CommitPoint(segments_1), IndexFileDeleter.CommitPoint(segments_2)] | |
commits.get(i) : IndexFileDeleter.CommitPoint(segments_1) | |
seg count : 1 | |
2 | |
2 | |
segments_2 | |
[_1_Lucene41_0.doc, _0_Lucene41_0.pos, _1.fnm, _1_Lucene41_0.pay, _0.nvd, _0.si, _0_Lucene41_0.tim, _0_Lucene41_0.tip, _1.nvm, _1.tvx, _0.fnm, _1_Lucene41_0.tim, _0.tvd, _1.nvd, _1_Lucene41_0.tip, _0.nvm, _1_Lucene41_0.pos, _1.fdx, _0_Lucene41_0.pay, _0.tvx, _0_Lucene41_0.doc, _1.fdt, _1.si, segments_2, _0.fdx, _1.tvd, _0.fdt] | |
false | |
commits size : 3 | |
commits : [IndexFileDeleter.CommitPoint(segments_1), IndexFileDeleter.CommitPoint(segments_2), IndexFileDeleter.CommitPoint(segments_3)] | |
commits.get(i) : IndexFileDeleter.CommitPoint(segments_1) | |
seg count : 1 | |
commits.get(i) : IndexFileDeleter.CommitPoint(segments_2) | |
seg count : 2 | |
3 | |
3 | |
segments_3 | |
[_2.si, _2_Lucene41_0.pos, _1_Lucene41_0.doc, _2_Lucene41_0.tim, _0_Lucene41_0.pos, _2_Lucene41_0.tip, _1.fnm, _2.tvx, _2.tvd, _1_Lucene41_0.pay, _0.nvd, _0.si, _0_Lucene41_0.tim, _0_Lucene41_0.tip, _1.nvm, _1.tvx, _0.fnm, _1_Lucene41_0.tim, _0.tvd, _1.nvd, _2.fdt, _1_Lucene41_0.tip, _2_Lucene41_0.doc, _0.nvm, _2.fdx, _2.fnm, _1_Lucene41_0.pos, _1.fdx, _0_Lucene41_0.pay, _2.nvm, _2_Lucene41_0.pay, _0.tvx, _0_Lucene41_0.doc, _1.fdt, _1.si, _2.nvd, _0.fdx, _1.tvd, segments_3, _0.fdt] | |
false | |
commits size : 4 | |
commits : [IndexFileDeleter.CommitPoint(segments_1), IndexFileDeleter.CommitPoint(segments_2), IndexFileDeleter.CommitPoint(segments_3), IndexFileDeleter.CommitPoint(segments_4)] | |
commits.get(i) : IndexFileDeleter.CommitPoint(segments_1) | |
seg count : 1 | |
commits.get(i) : IndexFileDeleter.CommitPoint(segments_2) | |
seg count : 2 | |
commits.get(i) : IndexFileDeleter.CommitPoint(segments_3) | |
seg count : 3 | |
4 | |
2 | |
segments_4 | |
[_2_Lucene41_0.pos, _2.si, _1_Lucene41_0.doc, _2_Lucene41_0.tim, _2_Lucene41_0.tip, _1.fnm, _2.tvx, _2.tvd, _1_Lucene41_0.pay, _1.tvx, _1.nvm, _1_Lucene41_0.tim, _1.nvd, _2.fdt, _1_Lucene41_0.tip, _2_Lucene41_0.doc, _2.fdx, _2.fnm, _1.fdx, _1_Lucene41_0.pos, _2.nvm, _2_Lucene41_0.pay, _1.fdt, _1.si, _2.nvd, _1.tvd, segments_4] | |
false | |
이런형태로 IndexDeletionPolicy를 상속하면 commit시의 행동을 결정 할 수도 있습니다.
이를 이용해서 replication도 적용이 가능하겠구요.. 백업도 가능하겠구요...
두번째는 SnapShotDeletionPolicy인데요, 이것은 사용자가 key로 현재의 IndexCommit을 저장해두는 (스냅샷을 만드는) 기능을 제공합니다. 인덱스파일들이 백업이 되어있다면 특정 시점으로의 복구나 이런것들도 가능할것 같습니다.
package com.tistory.devyongsik.policy; | |
import java.io.IOException; | |
import java.util.List; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.FieldType; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.FieldInfo.IndexOptions; | |
import org.apache.lucene.index.IndexCommit; | |
import org.apache.lucene.index.IndexDeletionPolicy; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.IndexWriterConfig.OpenMode; | |
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; | |
import org.apache.lucene.index.SnapshotDeletionPolicy; | |
import org.apache.lucene.index.Term; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
import org.junit.Test; | |
public class PolicyTest { | |
@Test | |
public void snapShotDeletionPolicyTest() throws IOException { | |
String a = "learning perl learning java learning ruby"; | |
String b = "perl test t"; | |
String c = "perl test t learning"; | |
Directory dir = new RAMDirectory(); | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_42); //문서 내용을 분석 할 때 사용 될 Analyzer | |
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_42, analyzer); | |
iwc.setOpenMode(OpenMode.CREATE); | |
IndexDeletionPolicy primaryPolicy = new KeepOnlyLastCommitDeletionPolicy(); | |
SnapshotDeletionPolicy snapShotPolicy = new SnapshotDeletionPolicy(primaryPolicy, null); | |
iwc.setIndexDeletionPolicy(snapShotPolicy); | |
IndexWriter writer = new IndexWriter(dir, iwc); //8. 드디어 IndexWriter를 생성합니다. | |
Document doc1 = new Document(); | |
FieldType f1type = new FieldType(); | |
f1type.setIndexed(true); | |
f1type.setStored(false); | |
f1type.setTokenized(true); | |
f1type.setStoreTermVectors(true); | |
f1type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); | |
Field f1 = new Field("f", a, f1type); | |
doc1.add(f1); | |
writer.addDocument(doc1); | |
writer.commit(); | |
DirectoryReader directoryReader = DirectoryReader.open(dir); | |
IndexCommit indexCommit = directoryReader.getIndexCommit(); | |
System.out.println(indexCommit.getGeneration()); | |
System.out.println(indexCommit.getSegmentCount()); | |
System.out.println(indexCommit.getSegmentsFileName()); | |
System.out.println(indexCommit.getFileNames()); | |
System.out.println(indexCommit.isDeleted()); | |
snapShotPolicy.snapshot("segment1"); | |
Document doc2 = new Document(); | |
Field f2 = new Field("f", b, f1type); | |
doc2.add(f2); | |
writer.addDocument(doc2); | |
writer.commit(); | |
DirectoryReader directoryReader2 = DirectoryReader.open(dir); | |
IndexCommit indexCommit2 = directoryReader2.getIndexCommit(); | |
System.out.println(indexCommit2.getGeneration()); | |
System.out.println(indexCommit2.getSegmentCount()); | |
System.out.println(indexCommit2.getSegmentsFileName()); | |
System.out.println(indexCommit2.getFileNames()); | |
System.out.println(indexCommit2.isDeleted()); | |
snapShotPolicy.snapshot("segment2"); | |
System.out.println("snapShot : " + snapShotPolicy.getSnapshots()); | |
System.out.println("segment1 : " + snapShotPolicy.getSnapshot("segment1").getFileNames()); | |
System.out.println("segment1 : " + snapShotPolicy.getSnapshot("segment1").getSegmentsFileName()); | |
System.out.println("segment2 : " + snapShotPolicy.getSnapshot("segment2").getFileNames()); | |
System.out.println("segment2 : " + snapShotPolicy.getSnapshot("segment2").getSegmentsFileName()); | |
writer.close(); | |
} | |
} | |
//Result | |
1 | |
1 | |
segments_1 | |
[_0.fnm, _0_Lucene41_0.pos, _0.tvd, _0.nvm, _0_Lucene41_0.pay, _0_Lucene41_0.doc, _0.tvx, segments_1, _0.nvd, _0.fdx, _0.si, _0_Lucene41_0.tim, _0.fdt, _0_Lucene41_0.tip] | |
false | |
2 | |
2 | |
segments_2 | |
[_1_Lucene41_0.doc, _0_Lucene41_0.pos, _1.fnm, _1_Lucene41_0.pay, _0.nvd, _0.si, _0_Lucene41_0.tim, _0_Lucene41_0.tip, _1.nvm, _1.tvx, _0.fnm, _1_Lucene41_0.tim, _0.tvd, _1.nvd, _1_Lucene41_0.tip, _0.nvm, _1_Lucene41_0.pos, _1.fdx, _0_Lucene41_0.pay, _0.tvx, _0_Lucene41_0.doc, _1.fdt, _1.si, segments_2, _0.fdx, _1.tvd, _0.fdt] | |
false | |
snapShot : {segment1=segments_1, segment2=segments_2} | |
segment1 : [_0.fnm, _0_Lucene41_0.pos, _0.tvd, _0.nvm, _0_Lucene41_0.pay, _0_Lucene41_0.doc, _0.tvx, segments_1, _0.nvd, _0.fdx, _0.si, _0_Lucene41_0.tim, _0.fdt, _0_Lucene41_0.tip] | |
segment1 : segments_1 | |
segment2 : [_1_Lucene41_0.doc, _0_Lucene41_0.pos, _1.fnm, _1_Lucene41_0.pay, _0.nvd, _0.si, _0_Lucene41_0.tim, _0_Lucene41_0.tip, _1.nvm, _1.tvx, _0.fnm, _1_Lucene41_0.tim, _0.tvd, _1.nvd, _1_Lucene41_0.tip, _0.nvm, _1_Lucene41_0.pos, _1.fdx, _0_Lucene41_0.pay, _0.tvx, _0_Lucene41_0.doc, _1.fdt, _1.si, segments_2, _0.fdx, _1.tvd, _0.fdt] | |
segment2 : segments_2 |