Parrallelize nested process & Parrallelize main process have no impact
nurman
New Altair Community Member
I have tried to run my process with and without the parallel nested process & parrallelize main process but they have NO impact on performance on my 4 core processors. I have tested both on Ubuntu and Windows machine but there's no difference in performance?
Can you please help?
The following I attach my process
Would really appreciate your assistance. Thank you very much.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.014">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.014" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true" height="350" width="614">
<operator activated="true" class="loop_files" compatibility="5.1.014" expanded="true" height="130" name="Loop Files (2)" width="90" x="179" y="75">
<parameter key="directory" value="D:\documents\VaR\test"/>
<parameter key="filter" value=".*\.csv"/>
<parameter key="parallelize_nested_process" value="true"/>
<process expanded="true" height="476" width="505">
<operator activated="true" class="read_csv" compatibility="5.1.014" expanded="true" height="60" name="Read CSV (2)" width="90" x="45" y="30">
<parameter key="csv_file" value="C:\Users\mhelf\tmp\files\file1.txt"/>
<parameter key="column_separators" value=","/>
<list key="annotations"/>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="Date.true.nominal.id"/>
<parameter key="1" value="Open.true.real.attribute"/>
<parameter key="2" value="High.true.real.attribute"/>
<parameter key="3" value="Low.true.real.attribute"/>
<parameter key="4" value="Close.true.real.attribute"/>
<parameter key="5" value="Change.true.real.attribute"/>
<parameter key="6" value="Change Percent.true.real.attribute"/>
<parameter key="7" value="Volume.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="sort" compatibility="5.1.014" expanded="true" height="76" name="Sort (2)" width="90" x="45" y="120">
<parameter key="attribute_name" value="Close"/>
</operator>
<operator activated="true" class="sort" compatibility="5.1.014" expanded="true" height="76" name="Sort" width="90" x="45" y="255">
<parameter key="attribute_name" value="Date"/>
</operator>
<operator activated="true" class="split_data" compatibility="5.1.014" expanded="true" height="94" name="Split Data" width="90" x="179" y="255">
<enumeration key="partitions">
<parameter key="ratio" value="0.99"/>
<parameter key="ratio" value="0.01"/>
</enumeration>
<parameter key="sampling_type" value="linear sampling"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="5.1.002" expanded="true" height="76" name="Windowing (2)" width="90" x="179" y="390">
<parameter key="window_size" value="1"/>
<parameter key="label_attribute" value="Close"/>
</operator>
<operator activated="true" class="sort" compatibility="5.1.014" expanded="true" height="76" name="Sort (3)" width="90" x="246" y="165">
<parameter key="attribute_name" value="Date"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="5.1.002" expanded="true" height="76" name="Windowing" width="90" x="189" y="32">
<parameter key="horizon" value="1"/>
<parameter key="window_size" value="1"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="Open"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="5.1.002" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<parameter key="training_window_width" value="20"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="20"/>
<parameter key="horizon" value="5"/>
<process expanded="true" height="408" width="309">
<operator activated="true" class="support_vector_machine" compatibility="5.1.014" expanded="true" height="112" name="SVM" width="90" x="114" y="30">
<parameter key="convergence_epsilon" value="0.001"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="408" width="309">
<operator activated="true" class="apply_model" compatibility="5.1.014" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="5.1.002" expanded="true" height="76" name="Performance" width="90" x="181" y="30">
<parameter key="horizon" value="1"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.1.014" expanded="true" height="76" name="Apply Model (2)" width="90" x="380" y="210">
<list key="application_parameters"/>
</operator>
<connect from_port="file object" to_op="Read CSV (2)" to_port="file"/>
<connect from_op="Read CSV (2)" from_port="output" to_op="Sort (2)" to_port="example set input"/>
<connect from_op="Sort (2)" from_port="example set output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Sort (3)" to_port="example set input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Sort (3)" from_port="example set output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="out 2"/>
<connect from_op="Validation" from_port="averagable 1" to_port="out 3"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="out 4"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
<portSpacing port="sink_out 4" spacing="0"/>
<portSpacing port="sink_out 5" spacing="0"/>
</process>
</operator>
<connect from_op="Loop Files (2)" from_port="out 1" to_port="result 1"/>
<connect from_op="Loop Files (2)" from_port="out 2" to_port="result 2"/>
<connect from_op="Loop Files (2)" from_port="out 3" to_port="result 3"/>
<connect from_op="Loop Files (2)" from_port="out 4" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
Hi,
if "parallelize process" is activated, independent pahs inside the process are executed in parallel. So in your example process {Windowing (2)} is executed in parallel to {Sort (3), Windowing, Validation}. Since Windowing (2) is probably quite fast, you don't notice any performance improvements from this.
Best, Marius0 -
Dear Marius,
I hope you can help me here. All I wanted is to run all the processes in the Loop files operator with different input datasets (different csv files) in parallel leveraging multi-core. How can I achieve this? May you please fix my workflow and show how this works.
Would really appreciate your help.
Thank you very much.
regards,
nurman0 -
There is no parallelized Loop Files operator. But you could install the Parallel Extension and use the X-Validation (Parallel) inside your loop. The validation is probably the most time-consuming step in your process, so you would gain a lot from parallelizing it. Be warned though that the Parallel extension sometimes still has some hicc-ups.0