Quick way to extract all attribute names and write to a document?
Dear Community,
I want to depivot a wide table which has names like
Pages[0].Item.Version
Pages[0].Item.Language
Pages[0].Url.Path
Pages[0].VisitPageIndex
Pages[0].Duration
.
.
.
Pages[1].Item.Version
Pages[1].Item.Language
Pages[1].Url.Path
Pages[1].VisitPageIndex
Pages[1].Duration
I used a regex in 'select attribute' to filter the wanted attributes, then I de-pivot the table. But I feel it is more convenient if I can copy all the attribute names from a flat document...
Thanks in advance!
Best Answer
-
I created a process, using Groovy, that makes a new example set containing the names and types of an existing example set. Here's a link.
and the XML is here...
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.001" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="112" y="34">
<list key="attribute_values">
<parameter key="label_real" value="1.1"/>
<parameter key="regular_real" value="1.1"/>
<parameter key="regular_integer" value="1"/>
<parameter key="regular_string" value=""hello""/>
<parameter key="regular_boolean" value="true"/>
<parameter key="regular_date" value="date_now()"/>
<parameter key="regular_text" value=""world""/>
<parameter key="id_integer" value="1"/>
<parameter key="cluster_integer" value="1"/>
<parameter key="prediction_integer" value="1"/>
<parameter key="weight_integer" value="1"/>
<parameter key="batch_integer" value="1"/>
</list>
<list key="set_additional_roles">
<parameter key="label_real" value="label"/>
<parameter key="id_integer" value="id"/>
<parameter key="cluster_integer" value="cluster"/>
<parameter key="prediction_integer" value="prediction"/>
<parameter key="weight_integer" value="weight"/>
<parameter key="batch_integer" value="batch"/>
</list>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="7.0.001" expanded="true" height="82" name="Nominal to Text" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="regular_text"/>
</operator>
<operator activated="true" class="execute_script" compatibility="7.0.001" expanded="true" height="103" name="Execute Script" width="90" x="581" y="34">
<parameter key="script" value="import com.rapidminer.tools.Ontology; ExampleSet exampleSet0 = input[0]; Attribute[] attributes = new Attribute[3]; attributes[0] = AttributeFactory.createAttribute("Name", Ontology.STRING); attributes[1] = AttributeFactory.createAttribute("Role", Ontology.STRING); attributes[2] = AttributeFactory.createAttribute("Type", Ontology.STRING); MemoryExampleTable table = new MemoryExampleTable(attributes); DataRowFactory ROW_FACTORY = new DataRowFactory(0); Integer numberOfAttributes = exampleSet0.getAttributes().allSize(); String role = ""; int i = 0; String[] values = new String[3]; for (Attribute attribute : exampleSet0.getAttributes().allAttributes()) { 	String valueType = attribute.getValueType(); 	operator.logNote ("Type: " + valueType); 	String name = attribute.getName(); 	AttributeRole currentRole = exampleSet0.getAttributes().getRole(attribute); 	Boolean isSpecial = currentRole.isSpecial(); 	if (isSpecial == true) { 		role = currentRole.getSpecialName(); 	} 	else { 		role = "regular"; 	} 	i = i + 1; 	values[0] = name; 	values[1] = role; 	values[2] = valueType; 	DataRow row = ROW_FACTORY.create(values, attributes); table.addDataRow(row);	 } // the first output is the input // the second output is the meta data for the input ExampleSet exampleSet = table.createExampleSet(); ExampleSet[] exampleSets = new ExampleSet[2]; exampleSets[0] = exampleSet0; exampleSets[1] = exampleSet; return exampleSets;"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Execute Script" to_port="input 1"/>
<connect from_op="Execute Script" from_port="output 1" to_port="result 1"/>
<connect from_op="Execute Script" from_port="output 2" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>3
Answers
-
I created a process, using Groovy, that makes a new example set containing the names and types of an existing example set. Here's a link.
and the XML is here...
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.001" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="112" y="34">
<list key="attribute_values">
<parameter key="label_real" value="1.1"/>
<parameter key="regular_real" value="1.1"/>
<parameter key="regular_integer" value="1"/>
<parameter key="regular_string" value=""hello""/>
<parameter key="regular_boolean" value="true"/>
<parameter key="regular_date" value="date_now()"/>
<parameter key="regular_text" value=""world""/>
<parameter key="id_integer" value="1"/>
<parameter key="cluster_integer" value="1"/>
<parameter key="prediction_integer" value="1"/>
<parameter key="weight_integer" value="1"/>
<parameter key="batch_integer" value="1"/>
</list>
<list key="set_additional_roles">
<parameter key="label_real" value="label"/>
<parameter key="id_integer" value="id"/>
<parameter key="cluster_integer" value="cluster"/>
<parameter key="prediction_integer" value="prediction"/>
<parameter key="weight_integer" value="weight"/>
<parameter key="batch_integer" value="batch"/>
</list>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="7.0.001" expanded="true" height="82" name="Nominal to Text" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="regular_text"/>
</operator>
<operator activated="true" class="execute_script" compatibility="7.0.001" expanded="true" height="103" name="Execute Script" width="90" x="581" y="34">
<parameter key="script" value="import com.rapidminer.tools.Ontology; ExampleSet exampleSet0 = input[0]; Attribute[] attributes = new Attribute[3]; attributes[0] = AttributeFactory.createAttribute("Name", Ontology.STRING); attributes[1] = AttributeFactory.createAttribute("Role", Ontology.STRING); attributes[2] = AttributeFactory.createAttribute("Type", Ontology.STRING); MemoryExampleTable table = new MemoryExampleTable(attributes); DataRowFactory ROW_FACTORY = new DataRowFactory(0); Integer numberOfAttributes = exampleSet0.getAttributes().allSize(); String role = ""; int i = 0; String[] values = new String[3]; for (Attribute attribute : exampleSet0.getAttributes().allAttributes()) { 	String valueType = attribute.getValueType(); 	operator.logNote ("Type: " + valueType); 	String name = attribute.getName(); 	AttributeRole currentRole = exampleSet0.getAttributes().getRole(attribute); 	Boolean isSpecial = currentRole.isSpecial(); 	if (isSpecial == true) { 		role = currentRole.getSpecialName(); 	} 	else { 		role = "regular"; 	} 	i = i + 1; 	values[0] = name; 	values[1] = role; 	values[2] = valueType; 	DataRow row = ROW_FACTORY.create(values, attributes); table.addDataRow(row);	 } // the first output is the input // the second output is the meta data for the input ExampleSet exampleSet = table.createExampleSet(); ExampleSet[] exampleSets = new ExampleSet[2]; exampleSets[0] = exampleSet0; exampleSets[1] = exampleSet; return exampleSets;"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Execute Script" to_port="input 1"/>
<connect from_op="Execute Script" from_port="output 1" to_port="result 1"/>
<connect from_op="Execute Script" from_port="output 2" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>3