splitting names
i want to separate names from a column. For example
smith john b & mary
should give me
smith john b
smith mary
i am able to split the name but not able to bring the "last name - smith over to second person"
so i am getting
smith john b
mary <----- want to see "smith mary"
in excel i can use the "left" statement until it find space but not sure how to do this in rapidminer
Answers
-
Hi @pascasiw,
Here a possible solution :
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="9.0.001" expanded="true" height="68" name="Read Excel" width="90" x="112" y="136">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Split_names\Split_names.xlsx"/>
<list key="annotations"/>
<parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="Id.true.integer.attribute"/>
<parameter key="1" value="names.true.polynominal.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
</operator>
<operator activated="true" class="multiply" compatibility="9.0.001" expanded="true" height="103" name="Multiply" width="90" x="112" y="238"/>
<operator activated="true" class="split" compatibility="9.0.001" expanded="true" height="82" name="Split" width="90" x="313" y="136">
<parameter key="split_pattern" value="([\s]+)"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Id|names_1"/>
</operator>
<operator activated="true" class="split" compatibility="9.0.001" expanded="true" height="82" name="Split (2)" width="90" x="313" y="238">
<parameter key="split_pattern" value="^[^\s]*\s"/>
</operator>
<operator activated="true" class="concurrency:join" compatibility="9.0.001" expanded="true" height="82" name="Join" width="90" x="581" y="187">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="Id" value="Id"/>
</list>
</operator>
<operator activated="true" class="split" compatibility="9.0.001" expanded="true" height="82" name="Split (3)" width="90" x="715" y="187">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="names_2"/>
<parameter key="split_pattern" value="&"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="849" y="187">
<list key="function_descriptions">
<parameter key="name1" value="concat(names_1," ",names_2_1)"/>
<parameter key="name2" value="concat(names_1," ",names_2_2)"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="983" y="187">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="names_.*"/>
<parameter key="invert_selection" value="true"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Split" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Split (2)" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/>
<connect from_op="Split (2)" from_port="example set output" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Split (3)" to_port="example set input"/>
<connect from_op="Split (3)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>I hope it helps,
Regards,
Lionel
1 -
Lionel,
Thanks for your reply. I have been testing the solution you provided but still facing some other issues. I will let you know once I have it working.
0 -
I agree with Lionel that the Spilt operator will allow you to separate all the names out into separate attributes and then you can combine them using subsequent rules of your choice.
1