Hi,
my custom WrapperXValidation operator consumes too much memory if the attribute weighted example set gets cloned, which results in a OutOfMemoryError. I have a ConditionedExampleSet created with a Condition which is used for calculating the AttributeWeights. Than I create an AttributeWeightedExampleSet. The memory consumption increases from ~300 MB to over 1,2GB. I have ~300 examples with ~2500 attributes. Any ideas what went wrong? The OOME occurs if createCleanClone() is called or when the k-Nearest-Neighbour learner is applied (see note "//FAILS HERE.." in source snippet). I don't have any problems if my custom XValidation operator (without attribute weighting) is used.
Snippet from custom WrapperXValidation:
for (final DocumentRDs testingDocumentRDs : splittedDocumentRDs) {
List<DocumentRDs> training = new ArrayList<DocumentRDs>(splittedDocumentRDs);
training.remove(testingDocumentRDs);
final DocumentRDs trainingDocumentRDs = new DocumentRDs(inputDocumentRDs.getFacet(), training);
trainingDocumentRDs.setFeatureNames(inputDocumentRDs.getFeatureNames());
IOContainer resultFromOperator0 = getOperator(0).apply(new IOContainer(trainingDocumentRDs));
Model indexingModel = resultFromOperator0.get(Model.class);
IOContainer resultFromOperator1 = getOperator(1).apply(new IOContainer(inputDocumentRDs, indexingModel));
ExampleSet exampleSet = resultFromOperator1.get(ExampleSet.class);
ConditionedExampleSet trainingExampleSet = new ConditionedExampleSet(exampleSet, new Condition() {
private static final long serialVersionUID = -501393443936335688L;
private Set<String> ids = trainingDocumentRDs.getIds();
@Override
public boolean conditionOk(Example example) {
String id = example.getNominalValue(example.getAttributes().getId());
return ids.contains(id);
}
@Override
@Deprecated
public Condition duplicate() {
throw new UnsupportedOperationException();
}
});
ConditionedExampleSet testingExampleSet = new ConditionedExampleSet(exampleSet, new Condition() {
private static final long serialVersionUID = -8328394200676917060L;
private Set<String> ids = testingDocumentRDs.getIds();
@Override
public boolean conditionOk(Example example) {
String id = example.getNominalValue(example.getAttributes().getId());
return ids.contains(id);
}
@Override
@Deprecated
public Condition duplicate() {
throw new UnsupportedOperationException();
}
});
IOContainer resultFromOperator2 = getOperator(2).apply(new IOContainer(trainingExampleSet));
AttributeWeights attributeWeights = resultFromOperator2.get(AttributeWeights.class);
handleWeights(globalAttributeWeights, attributeWeights);
AttributeWeightedExampleSet attributeWeightedTrainingExampleSet = new AttributeWeightedExampleSet(trainingExampleSet, attributeWeights, 0);
ExampleSet c = attributeWeightedTrainingExampleSet.createCleanClone(); //FAILS HERE
IOContainer resultFromOperator3 = getOperator(3).apply(new IOContainer(c)); //OR HERE
Model model = resultFromOperator3.get(Model.class);
IOContainer resultFromOperator4 = getOperator(4).apply(new IOContainer(model, testingExampleSet));
Tools.handleAverages(resultFromOperator4, averageVectors, true);
inApplyLoop();
}
Snippet from custom XValidation (no problem):
for (final DocumentRDs testingDocumentRDs : splittedDocumentRDs) {
List<DocumentRDs> training = new ArrayList<DocumentRDs>(splittedDocumentRDs);
training.remove(testingDocumentRDs);
final DocumentRDs trainingDocumentRDs = new DocumentRDs(inputDocumentRDs.getFacet(), training);
trainingDocumentRDs.setFeatureNames(inputDocumentRDs.getFeatureNames());
IOContainer resultFromOperator0 = getOperator(0).apply(new IOContainer(trainingDocumentRDs));
Model indexingModel = resultFromOperator0.get(Model.class);
IOContainer resultFromOperator1 = getOperator(1).apply(new IOContainer(inputDocumentRDs, indexingModel));
ExampleSet exampleSet = resultFromOperator1.get(ExampleSet.class);
ConditionedExampleSet trainingExampleSet = new ConditionedExampleSet(exampleSet, new Condition() {
private static final long serialVersionUID = -501393443936335688L;
private Set<String> ids = trainingDocumentRDs.getIds();
@Override
public boolean conditionOk(Example example) {
String id = example.getNominalValue(example.getAttributes().getId());
return ids.contains(id);
}
@Override
@Deprecated
public Condition duplicate() {
throw new UnsupportedOperationException();
}
});
ConditionedExampleSet testingExampleSet = new ConditionedExampleSet(exampleSet, new Condition() {
private static final long serialVersionUID = -8328394200676917060L;
private Set<String> ids = testingDocumentRDs.getIds();
@Override
public boolean conditionOk(Example example) {
String id = example.getNominalValue(example.getAttributes().getId());
return ids.contains(id);
}
@Override
@Deprecated
public Condition duplicate() {
throw new UnsupportedOperationException();
}
});
IOContainer resultFromOperator2 = getOperator(2).apply(new IOContainer(trainingExampleSet));
Model model = resultFromOperator2.get(Model.class);
IOContainer resultFromOperator3 = getOperator(3).apply(new IOContainer(model, testingExampleSet));
Tools.handleAverages(resultFromOperator3, averageVectors, true);
inApplyLoop();
}
Thanks