A program to recognize and reward our most engaged community members
/* * RapidMiner * * Copyright (C) 2001-2009 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */package com.rapidminer.operator.text.io.transformer;import java.util.ArrayList;import java.util.List;import com.rapidminer.operator.Operator;import com.rapidminer.operator.OperatorDescription;import com.rapidminer.operator.OperatorException;import com.rapidminer.operator.Value;import com.rapidminer.operator.ports.InputPortExtender;import com.rapidminer.operator.ports.OutputPort;import com.rapidminer.operator.text.Document;import com.rapidminer.operator.text.Token;/** * This operator combines serveral documents by appending their content to a new * document. The meta data will be added from all documents but the values of * the first documents will be overwritten by the values of the following. * * @author Tobias Malbrecht, Sebastian Land */public class CombineDocumentsOperator extends Operator { private InputPortExtender documentInputPorts = new InputPortExtender( "documents", getInputPorts()); private OutputPort documentOutput = getOutputPorts().createPort("document"); public CombineDocumentsOperator(OperatorDescription description) { super(description); documentInputPorts.start(); getTransformer().addGenerationRule(documentOutput, Document.class); }@Override public void doWork() throws OperatorException { List<Document> documents = documentInputPorts.getData(true); List<Token> tokens = new ArrayList<Token>(); Document result = new Document(tokens); //within this loop i observe the labelnames of the documents. if they entail a pattern like <label>_weigh_<weight> //i cast <weight> to float and i'm multiplying every token's weight with <weight> String[] splitted; for (Document document : documents) { String label = (String) document.getMetaDataValue("label"); splitted = label.split("_weight_"); if (splitted.length > 1) { List<Token> newSequence = new ArrayList<Token>(); float weight = Float.parseFloat(splitted[1]); List<Token> tseq = document.getTokenSequence(); for (Token token : tseq) { Token t = new Token(token.getToken(), token.getWeight() * weight); newSequence.add(t); System.out.println(t); } tokens.addAll(newSequence); } else { tokens.addAll(document.getTokenSequence()); } //this line is just for beauty document.addMetaData("label", splitted[0], document.getMetaDataType("label")); result.addMetaData(document); } documentOutput.deliver(result); }}