Why does a process run smoothely on the Studio but on the AI Hub it throws a datatype error?
phaupts
New Altair Community Member
My process runs in the AI Studio on my local machine without any error. When running the process on our AI Hub, I get the following error message: Ooops. Seems like you have found a bug. Please report it in our community at https://community.rapidminer.com. Reason: Script terminated abnormally: TypeError: int() argument must be a string, a bytes-like object or a real number, not 'Timestamp' The data loaded from the database contains columns with datetime values, but no part of the code does anything with those values. The code should be fine as it runs smoothely on my local Studio. What could be the reason for the error message and how could I solve it?
<?xml version="1.0" encoding="UTF-8"?><process version="10.4.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="10.4.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="10.4.001" expanded="true" height="68" name="Retrieve FE_Koelsch" width="90" x="45" y="238">
<parameter key="repository_entry" value="/Connections/SCGSRVSQL16-FE_Ergebnis_Koelsch"/>
</operator>
<operator activated="true" class="jdbc_connectors:read_database" compatibility="10.4.001" expanded="true" height="82" name="Read JsonTable" width="90" x="179" y="136">
<parameter key="define_connection" value="repository"/>
<parameter key="connection_entry" value="//SCG_Test_Rapidminer/Connections/FE_Koelsch"/>
<parameter key="database_system" value="MySQL"/>
<parameter key="define_query" value="query"/>
<parameter key="query" value="SELECT * FROM "dbo"."JsonTable" ORDER BY "jid" ASC OFFSET (SELECT COUNT(*) FROM "dbo"."JsonTable") - 100 ROWS FETCH NEXT 100 ROWS ONLY;"/>
<parameter key="use_default_schema" value="true"/>
<parameter key="prepare_statement" value="false"/>
<enumeration key="parameters"/>
<parameter key="data_management" value="auto"/>
</operator>
<operator activated="true" class="jdbc_connectors:read_database" compatibility="10.4.001" expanded="true" height="82" name="Read ResultsTable" width="90" x="179" y="238">
<parameter key="define_connection" value="repository"/>
<parameter key="connection_entry" value="//SCG_Test_Rapidminer/Connections/FE_Koelsch"/>
<parameter key="database_system" value="MySQL"/>
<parameter key="define_query" value="query"/>
<parameter key="query" value="SELECT * FROM "dbo"."ResultsTable""/>
<parameter key="use_default_schema" value="true"/>
<parameter key="prepare_statement" value="false"/>
<enumeration key="parameters"/>
<parameter key="data_management" value="auto"/>
</operator>
<operator activated="false" class="filter_example_range" compatibility="10.4.001" expanded="true" height="82" name="Filter Example Range" width="90" x="313" y="136">
<parameter key="first_example" value="20000"/>
<parameter key="last_example" value="20100"/>
<parameter key="invert_filter" value="false"/>
</operator>
<operator activated="true" class="concurrency:join" compatibility="10.4.001" expanded="true" height="82" name="Join" width="90" x="447" y="238">
<parameter key="remove_double_attributes" value="true"/>
<parameter key="join_type" value="inner"/>
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="rID" value="rid"/>
</list>
<parameter key="keep_both_join_attributes" value="false"/>
</operator>
<operator activated="true" class="python_scripting:execute_python" compatibility="10.1.001" expanded="true" height="103" name="Analyze Features" width="90" x="581" y="238">
<parameter key="script" value="import pandas as pd import json def json_conv(element): features_string = element['Features'] tube_length = element['PipeLength'] # Check if 'Features' is empty, None, or the JSON equivalent of None if not features_string or features_string == "null": return [(0, tube_length)] features_json = json.loads(features_string) # Check if features_json is None after loading if features_json is None: return [(0, tube_length)] # Sort features_json by 'Xpos' features_json.sort(key=lambda x: x['Xpos']) # List to store segments without defects no_defect_segments = [] # Starting point of the tube start_point = 0 for feature in features_json: defect_start = feature['Xpos'] - feature['Width'] / 2 defect_end = feature['Xpos'] + feature['Width'] / 2 # Check if segment before this defect is longer than 100mm if defect_start - start_point >= 100: no_defect_segments.append((start_point, defect_start)) # Update the start point for the next segment start_point = defect_end # Check for the last segment of the tube if tube_length - start_point >= 100: no_defect_segments.append((start_point, tube_length)) return no_defect_segments def rm_main(data): datanew = data datanew['No_defect_segments'] = datanew.apply(json_conv, axis=1) return (data, datanew)"/>
<parameter key="notebook_cell_tag_filter" value=""/>
<parameter key="use_default_python" value="true"/>
<parameter key="package_manager" value="conda (anaconda)"/>
<parameter key="conda_environment" value="anaconda3"/>
<parameter key="use_macros" value="false"/>
</operator>
<operator activated="true" class="blending:select_attributes" compatibility="10.4.001" expanded="true" height="82" name="Select Attributes" width="90" x="715" y="238">
<parameter key="type" value="exclude attributes"/>
<parameter key="attribute_filter_type" value="a subset"/>
<parameter key="select_attribute" value=""/>
<parameter key="select_subset" value="Features␞ClassifyData"/>
<parameter key="also_apply_to_special_attributes_(id,_label..)" value="false"/>
</operator>
<operator activated="true" class="store" compatibility="10.4.001" expanded="true" height="68" name="Store" width="90" x="1184" y="238">
<parameter key="repository_entry" value="../Data/DP_Outside_Segments_wo_Defects"/>
</operator>
<connect from_op="Retrieve FE_Koelsch" from_port="output" to_op="Read JsonTable" to_port="connection"/>
<connect from_op="Read JsonTable" from_port="output" to_op="Join" to_port="left"/>
<connect from_op="Read JsonTable" from_port="connection" to_op="Read ResultsTable" to_port="connection"/>
<connect from_op="Read ResultsTable" from_port="output" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Analyze Features" to_port="input 1"/>
<connect from_op="Analyze Features" from_port="output 1" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
Hi,
this looks indeed odd to me. Can you check what happens if you remove the timestamp attributes before exec python?
BR,
Martin
0