Apache Spark java.lang.AssertionError:Assert失败

zi8p0yeb  于 2023-10-23  发布在  Apache
关注(0)|答案(1)|浏览(178)

我只是在spark版本2.4.0-cdh6.3.3中运行下面的简单查询,并在将数据插入表中时得到下面的错误。在版本2.2.0.cloudera2中执行相同的查询
查询如下:-

spark.sql("""Select Id , OwnerId owner_id, IsDeleted is_deleted, Name name, RecordTypeId record_type_id, CreatedDate create_date, CreatedById  created_by_id, LastModifiedDate  last_modified_date, LastModifiedById  last_modified_by_id, SystemModstamp  system_mod_stamp, cast(MayEdit as string)  may_edit, cast(IsLocked as string)  is_locked, Account_Id_vod__c , Account_Name_vod__c , Call_Name_vod__c , Comments_vod__c , Confirmed_Quantity_vod__c , Distributor_vod__c , Quantity_vod__c , Sample_U_M__c , Sample_vod__c , Ship_Address_Line_1_vod__c  ship_addr_line_1_vod__c, Ship_Address_Line_2_vod__c  ship_addr_line_2_vod__c, Ship_City_vod__c , Ship_Country_vod__c , Ship_License_Expiration_Date_vod__c   ship_license_exp_date_vod__c, Ship_License_Status_vod__c , Ship_License_vod__c , Ship_State_vod__c , Ship_Zip_vod__c , Shipment_Id_vod__c , Signature_Date_vod__c , Status_vod__c , Territory_vod__c , cast(Unlock_vod__c as string) , Call_Id_vod__c , Call_Sample_Id_vod__c , Call_Sample_Name_vod__c  call_sample_name_vod__c, ASSMCA_vod__c  assmca_vod__c, Account_vod__c , Call_Date_vod__c , Call_Datetime_vod__c , DEA_Expiration_Date_vod__c , DEA_vod__c, cast(Request_Receipt_vod__c as string), Sample_Card_Reason_vod__c , Sample_Send_Card_vod__c , Ship_Zip_4_vod__c ,  credentials_vod__c, Manufacturer_vod__c , Salutation_vod__c , Delivery_Status_vod__c , shipping_address__c  shipping_addr__c,   Sample_Request__c ,  Product_Id__c  Product_Id__c, 263669 batch_id, current_timestamp() rec_insert_date, 'vod' rec_insert_by, '' reject_reason, 'Y' status_flag from ph_com_p_usa_veeva.Sample_Order_Transaction_vod__c where   Item_status__c in ('Processing') and ingested_time > '2020-08-08 13:15:05' union all select id, owner_id, cast(is_deleted as boolean), name, record_type_id, create_date, created_by_id, last_modified_date, last_modified_by_id, system_mod_stamp, may_edit, is_locked, account_id_vod__c, account_name_vod__c, call_name_vod__c, comments_vod__c, confirmed_quantity_vod__c, distributor_vod__c, quantity_vod__c, sample_u_m__c, sample_vod__c, ship_addr_line_1_vod__c, ship_addr_line_2_vod__c, ship_city_vod__c, ship_country_vod__c, ship_license_exp_date_vod__c, ship_license_status_vod__c, ship_license_vod__c, ship_state_vod__c, ship_zip_vod__c, shipment_id_vod__c, signature_date_vod__c, status_vod__c, territory_vod__c, unlock_vod__c, call_id_vod__c, call_sample_id_vod__c, call_sample_name_vod__c, assmca_vod__c, account_vod__c, call_date_vod__c, call_datetime_vod__c,dea_expiration_date_vod__c,  dea_vod__c,  request_receipt_vod__c,  sample_card_reason_vod__c,  sample_send_card_vod__c,  ship_zip_4_vod__c,  credentials_vod__c,  manufacturer_vod__c,  salutation_vod__c,  delivery_status_vod__c,  shipping_addr__c,  sample_request__c,  product_id__c,263669  batch_id,CURRENT_TIMESTAMP()  rec_insert_date,  rec_insert_by,  reject_reason,'Y'  status_flag from all_all_r_usa_vntg.stg_vod_smpl_req_trsn stg_vod_smpl_req_trsn left anti join (select * from ph_com_p_usa_veeva.Sample_Order_Transaction_vod__c where   Item_status__c in ('Processing') and ingested_time > '2020-08-08 13:15:05'  ) b on (stg_vod_smpl_req_trsn.id=b.id) where status_flag!='Y' and rec_insert_date>'2020-08-08 13:15:05'""").createTempView("udldata")

spark.sql("""select Id,  owner_id,  is_deleted,  name,  record_type_id,  create_date,  created_by_id,  last_modified_date,  last_modified_by_id,  system_mod_stamp,  may_edit,  is_locked,  Account_Id_vod__c,  Account_Name_vod__c,  Call_Name_vod__c,  Comments_vod__c,  Confirmed_Quantity_vod__c,  Distributor_vod__c,  Quantity_vod__c,  Sample_U_M__c,  Sample_vod__c,  ship_addr_line_1_vod__c,  ship_addr_line_2_vod__c,  Ship_City_vod__c,  Ship_Country_vod__c,  ship_license_exp_date_vod__c,  Ship_License_Status_vod__c,  Ship_License_vod__c,  Ship_State_vod__c,  Ship_Zip_vod__c,  Shipment_Id_vod__c,  Signature_Date_vod__c,  Status_vod__c,  Territory_vod__c,  Unlock_vod__c,  Call_Id_vod__c,  Call_Sample_Id_vod__c,  call_sample_name_vod__c,  assmca_vod__c,  Account_vod__c,  Call_Date_vod__c,  Call_Datetime_vod__c,  DEA_Expiration_Date_vod__c,  DEA_vod__c,  Request_Receipt_vod__c,  Sample_Card_Reason_vod__c,  Sample_Send_Card_vod__c,  Ship_Zip_4_vod__c,  credentials_vod__c,  Manufacturer_vod__c,  Salutation_vod__c,  Delivery_Status_vod__c,  shipping_addr__c,  Sample_Request__c,  Product_Id__c,  batch_id,  rec_insert_date,  rec_insert_by,  'Customer not present in VDW' reject_reason,  'R' status_flag FROM udldata ul  WHERE ul.status_flag = 'Y'    AND NOT EXISTS (SELECT 1 FROM all_all_b_usa_crmods.ODS_CONTACTS oc   WHERE oc.sf_id = ul.ACCOUNT_ID_VOD__C  AND UPPER(oc.inactive) = 'FALSE')""").createTempView("rejected")

spark.sql("""select * from udldata ul left anti join rejected rj on ul.id = rj.id""").createTempView("valid")

spark.sql("""insert into ALL_ALL_R_USA_VNTG.STG_VOD_SMPL_REQ_TRSN select * from valid union select * from rejected""")

错误:-

java.lang.AssertionError: assertion failed
  at scala.Predef$.assert(Predef.scala:156)
  at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19$$anonfun$39.apply(Optimizer.scala:1038)
  at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19$$anonfun$39.apply(Optimizer.scala:1033)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.coll`enter code here`ection.Trav`enter code here`ersableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.immutable.List.foreach(List.scala:392)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collectio`enter code here`n.immutable.List.map(List.scala:296)
  at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19.applyOrElse(Optimizer.scala:1033)
  at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19.applyOrElse(Optimizer.scala:949)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256)
  at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:255)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at `enter code here`org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$tran`enter code here`sformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$mapChild$2(TreeNode.scala:295)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4$$anonfun$apply$13.apply(TreeNode.scala:354)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.immutable.List.foreach(List.scala:392)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.immutable.List.map(List.scala:296)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:354)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
  at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:245)
  at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$.apply(Optimizer.scala:949)
  at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$.apply(Optimizer.scala:948)
  at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
  at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
  at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
  at scala.collection.immutable.List.foldLeft(List.scala:84)
  at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
  at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
  at scala.collection.immutable.List.foreach(List.scala:392)
  at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
  at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66)
  at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66)
  at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
  at org.apache.spark.sql.execution.Qu          eryExecution.sparkPlan(QueryExecution.scala:68)           
  at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
  at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
  at org.apache.spark.`enter code here`sql.Dataset.withAction(Dataset.scala:3359)
  at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
  ... 49 elided
wlp8pajw

wlp8pajw1#

通过设置以下参数解决:-在将数据插入表中之前设置此参数:-

spark.sql("SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.PushDownPredicate")

spark.conf.getOption("spark.sql.optimizer.excludedRules")

相关问题