Execute Python breaks Colum if text hasta commas


Updated by Jocelyn
Hi I need some help I'm doing some crawling with Python (already tried with RM but I didn't get what I wanted in an easy way)
The last column of DF returns a big chunk of text that describes the product. for some reason when Execute Python creates the DataSet it creates new lines and erases the data that was sent on the DF. I tried writing the info from inside Python Execute and the outcome is a file with 1 row and 5 columns as expected.
Here is the process I'm using.
The last column of DF returns a big chunk of text that describes the product. for some reason when Execute Python creates the DataSet it creates new lines and erases the data that was sent on the DF. I tried writing the info from inside Python Execute and the outcome is a file with 1 row and 5 columns as expected.
Here is the process I'm using.
<?xml version="1.0" encoding="UTF-8"?><process version="9.1.000"> <context> <input/> <output/> <macros> <macro> <key>url</key> <value>https://www.liverpool.com.mx/tienda/pdp/consola-playstation-4-pro-1-tb/1059665339?s=play+station&skuId=1059665339</value> </macro> </macros> </context> <operator activated="true" class="process" compatibility="9.1.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="python_scripting:execute_python" compatibility="9.1.000" expanded="true" height="82" name="Execute Python" width="90" x="179" y="34"> <parameter key="script" value="import requests from bs4 import BeautifulSoup import pandas as pd def rm_main(): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'} columnas=['id','precio_n','precio_d','nombre','descripcion'] productos=pd.DataFrame(columns=columnas) session = requests.Session() url='%{url}' session.post(url,headers=headers) content=session.get(url) soup = BeautifulSoup(content.text,'html.parser') precio_normal=soup.find("input",id="listPrice") tipo=soup.find("a",_class="actual") llave=soup.find("input",id="productId") #productId #gtmPrice #productDisplayName precio_descuento=soup.find("input",id="gtmPrice") producto=soup.find("input",id="productDisplayName") descripcion=soup.find("div",id="intro").find('p').get_text() descripcion=descripcion.replace(',', '') descripcion=descripcion.replace('', '') #print(descripcion) fila=[llave['value'], precio_normal['value'], precio_descuento['value'], producto['value'], descripcion ] productos.loc[len(productos)]=fila return productos"/> <parameter key="use_default_python" value="true"/> <parameter key="package_manager" value="conda (anaconda)"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.1.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34"> <list key="function_descriptions"> <parameter key="Fecha" value="date_now()"/> </list> <parameter key="keep_all" value="true"/> </operator> <operator activated="true" class="date_to_nominal" compatibility="9.1.000" expanded="true" height="82" name="Date to Nominal" width="90" x="514" y="34"> <parameter key="attribute_name" value="Fecha"/> <parameter key="date_format" value="yyyy/MM/dd hh:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="keep_old_attribute" value="false"/> </operator> <connect from_op="Execute Python" from_port="output 1" to_op="Generate Attributes" to_port="example set input"/> <connect from_op="Generate Attributes" from_port="example set output" to_op="Date to Nominal" to_port="example set input"/> <connect from_op="Date to Nominal" from_port="example set output" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator> </process>