Preprocessing ideas to improve Memory/RAM usage efficiency

suleymansahal
suleymansahal New Altair Community Member
edited November 2024 in Community Q&A

Hi,

For a project i am doing research on the relation between financial ratios of firms and their long term stock returns. I have been trying to reach derived prices of stocks including dividend distributions of stocks in three nested loops. I have a data set including ten years of data with around 845,000 rows and 60 columns. In three nested loops i first loop through the values of stock id s and calculate derived prices. In every loop i store the resulting small set in order to be able to free memory. Sometimes i saw 18-20 percent values on loop values operator. Yet i could not succeed to finish it. I always end up with frozen RM. My PC has 16 GB RAM and 128 GB SSD disk with some pagefile. Below is the XML for this process. 

How could i improve this process or how should i change my perspective? Thanks in advance.

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve monthly_stock_2008_2017_permno" width="90" x="45" y="34">
<parameter key="repository_entry" value="../01 Data/monthly_stock_2008_2017_permno"/>
</operator>
<operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="ID &amp; Sort" width="90" x="179" y="34">
<process expanded="true">
<operator activated="true" class="date_to_numerical" compatibility="8.1.003" expanded="true" height="82" name="Date to Numerical" width="90" x="45" y="34">
<parameter key="attribute_name" value="Alternate Price Date"/>
<parameter key="time_unit" value="month"/>
<parameter key="hour_relative_to" value="epoch"/>
<parameter key="day_relative_to" value="epoch"/>
<parameter key="month_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
</operator>
<operator activated="true" class="sort" compatibility="8.1.003" expanded="true" height="82" name="Sort" width="90" x="180" y="34">
<parameter key="attribute_name" value="Payment Date"/>
</operator>
<operator activated="true" class="sort" compatibility="8.1.003" expanded="true" height="82" name="Sort (2)" width="90" x="313" y="34">
<parameter key="attribute_name" value="Alternate Price Date"/>
</operator>
<operator activated="true" class="sort" compatibility="8.1.003" expanded="true" height="82" name="Sort (3)" width="90" x="447" y="34">
<parameter key="attribute_name" value="PERMNO"/>
</operator>
<operator activated="true" class="generate_id" compatibility="8.1.003" expanded="true" height="82" name="Generate ID" width="90" x="581" y="34"/>
<connect from_port="in 1" to_op="Date to Numerical" to_port="example set input"/>
<connect from_op="Date to Numerical" from_port="example set output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Sort (2)" to_port="example set input"/>
<connect from_op="Sort (2)" from_port="example set output" to_op="Sort (3)" to_port="example set input"/>
<connect from_op="Sort (3)" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="85">
<list key="function_descriptions">
<parameter key="Return Factor" value="Returns+1.00"/>
<parameter key="Derived Price" value="1.00"/>
</list>
</operator>
<operator activated="true" class="numerical_to_polynominal" compatibility="8.1.003" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="PERMNO"/>
</operator>
<operator activated="true" class="concurrency:loop_values" compatibility="8.1.003" expanded="true" height="103" name="Loop Values" width="90" x="581" y="85">
<parameter key="attribute" value="PERMNO"/>
<parameter key="enable_parallel_execution" value="false"/>
<process expanded="true">
<operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter permno &amp; nonmissing" width="90" x="45" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="PERMNO.equals.%{loop_value}"/>
<parameter key="filters_entry_key" value="Returns.is_not_missing."/>
</list>
</operator>
<operator activated="true" class="loop_examples" compatibility="8.1.003" expanded="true" height="82" name="Loop Examples" width="90" x="179" y="34">
<parameter key="iteration_macro" value="loop_example_outer"/>
<process expanded="true">
<operator activated="true" class="generate_macro" compatibility="8.1.003" expanded="true" height="82" name="Generate Macro (3)" width="90" x="45" y="34">
<list key="function_descriptions">
<parameter key="kumulatif_fiyat" value="1.00"/>
</list>
</operator>
<operator activated="true" class="concurrency:loop" compatibility="8.1.003" expanded="true" height="82" name="Loop" width="90" x="179" y="34">
<parameter key="number_of_iterations" value="%{loop_example_outer}"/>
<parameter key="reuse_results" value="true"/>
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="8.1.003" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34">
<parameter key="macro" value="tekil_fiyat"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Return Factor"/>
<parameter key="example_index" value="%{iteration}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="generate_macro" compatibility="8.1.003" expanded="true" height="82" name="Generate Macro (2)" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="kumulatif_fiyat" value="eval(%{kumulatif_fiyat})*eval(%{tekil_fiyat})"/>
</list>
</operator>
<connect from_port="input 1" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Generate Macro (2)" to_port="through 1"/>
<connect from_op="Generate Macro (2)" from_port="through 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_data" compatibility="8.1.003" expanded="true" height="82" name="Set Data" width="90" x="313" y="34">
<parameter key="example_index" value="%{loop_example_outer}"/>
<parameter key="attribute_name" value="Derived Price"/>
<parameter key="value" value="%{kumulatif_fiyat}"/>
<list key="additional_values"/>
</operator>
<connect from_port="example set" to_op="Generate Macro (3)" to_port="through 1"/>
<connect from_op="Generate Macro (3)" from_port="through 1" to_op="Loop" to_port="input 1"/>
<connect from_op="Loop" from_port="output 1" to_op="Set Data" to_port="example set input"/>
<connect from_op="Set Data" from_port="example set output" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append Prices" width="90" x="313" y="34"/>
<operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter permno" width="90" x="45" y="187">
<list key="filters_list">
<parameter key="filters_entry_key" value="PERMNO.eq.%{loop_value}"/>
</list>
</operator>
<operator activated="true" class="concurrency:join" compatibility="8.1.003" expanded="true" height="82" name="Join" width="90" x="447" y="136">
<parameter key="join_type" value="left"/>
<list key="key_attributes"/>
</operator>
<operator activated="true" class="store" compatibility="8.1.003" expanded="true" height="68" name="Store" width="90" x="581" y="136">
<parameter key="repository_entry" value="//StockInvestment/01 Data/monthly stock derived prices/%{loop_value}"/>
</operator>
<operator activated="true" class="free_memory" compatibility="8.1.003" expanded="true" height="82" name="Free Memory" width="90" x="715" y="136"/>
<operator activated="false" class="handle_exception" compatibility="8.1.003" expanded="true" height="82" name="Handle Exception" width="90" x="246" y="340">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve Derived Price File" width="90" x="112" y="34">
<parameter key="repository_entry" value="//StockInvestment/01 Data/monthly_stock_2008_2017_permno_derived_prices"/>
</operator>
<connect from_op="Retrieve Derived Price File" from_port="output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<process expanded="true">
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="false" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append Stocks" width="90" x="380" y="442"/>
<operator activated="false" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="514" y="442">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="false" class="store" compatibility="8.1.003" expanded="true" height="68" name="Store Derived Price File" width="90" x="648" y="442">
<parameter key="repository_entry" value="//StockInvestment/01 Data/monthly_stock_2008_2017_permno_derived_prices"/>
</operator>
<operator activated="false" breakpoints="before,after" class="delete_repository_entry" compatibility="8.1.003" expanded="true" height="68" name="Delete Repository Entry" width="90" x="782" y="442">
<parameter key="entry_to_delete" value="//StockInvestment/01 Data/monthly stock derived prices/%{loop_value}"/>
<description align="center" color="transparent" colored="false" width="126">to be activated if necessary</description>
</operator>
<operator activated="false" class="free_memory" compatibility="8.1.003" expanded="true" height="82" name="Free Memory (2)" width="90" x="916" y="442"/>
<connect from_port="input 1" to_op="Filter permno &amp; nonmissing" to_port="example set input"/>
<connect from_port="input 2" to_op="Filter permno" to_port="example set input"/>
<connect from_op="Filter permno &amp; nonmissing" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_op="Append Prices" to_port="example set 1"/>
<connect from_op="Append Prices" from_port="merged set" to_op="Join" to_port="right"/>
<connect from_op="Filter permno" from_port="example set output" to_op="Join" to_port="left"/>
<connect from_op="Join" from_port="join" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_op="Free Memory" to_port="through 1"/>
<connect from_op="Free Memory" from_port="through 1" to_port="output 1"/>
<connect from_op="Handle Exception" from_port="out 1" to_op="Append Stocks" to_port="example set 1"/>
<connect from_op="Append Stocks" from_port="merged set" to_op="Remove Duplicates" to_port="example set input"/>
<connect from_op="Remove Duplicates" from_port="example set output" to_op="Store Derived Price File" to_port="input"/>
<connect from_op="Store Derived Price File" from_port="through" to_op="Free Memory (2)" to_port="through 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="105"/>
<portSpacing port="source_input 3" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append Final" width="90" x="715" y="85"/>
<connect from_op="Retrieve monthly_stock_2008_2017_permno" from_port="output" to_op="ID &amp; Sort" to_port="in 1"/>
<connect from_op="ID &amp; Sort" from_port="out 1" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="original" to_op="Loop Values" to_port="input 2"/>
<connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Loop Values" to_port="input 1"/>
<connect from_op="Loop Values" from_port="output 1" to_op="Append Final" to_port="example set 1"/>
<connect from_op="Append Final" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:

Best Answer

Answers

  • suleymansahal
    suleymansahal New Altair Community Member

    Thanks for the reply. The process was indeed heavy for my pc. Second level inner loop operator had append operator. It was not really necessary. I wanted to see interim result. When I removed it I was finally able to finish the whole process. So I learned append is really resource demanding operator. Thank you again.