0

Hi I am running below query on pyspark sql but getting error. Please help me where I am missing ')'.

Query -

`with cte1 as (select `Project Number`, indication,rank() over (partition by `Project Number`,REGEXP_REPLACE(indication,'[^a-zA-Z0-9]+', '') order by `Project Number`,indication) as rnk from (select distinct `Project Number`, indication from vw_onco_pharma onco_pharma union select distinct `Project Number`, indication from vw_onco_cell_gene cell_gene union select distinct `Project Number`, indication from vw_non_onco_cell_gene onco_cell_gene union select distinct `Project Number`, indication from vw_non_onco_pharma non_onco_pharma union select distinct `Project Number`, indication from vw_plasma_protein plasma_protein)),y as (select max(cast(project_id as integer)) as max_prj_id from vw_project_id) select nvl(max_prj_id,0)+ROW_NUMBER () OVER (ORDER BY `Project Number`,indication) as project_id,`Project Number`,indication,date_format(current_timestamp(),'yyyy-MM-dd hh:mm:ss') as HTA_INSERT_DT from (select cte1.`Project Number`, cte1.indication,max_prj_id from cte1 left join vw_project_id prj on cte1.`Project Number` = prj.`Project Number` and REGEXP_REPLACE(cte1.indicatio,'[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(prj.indication,'[^a-zA-Z0-9]+', '') left join  y on 1 = 1 where rnk = 1 and prj.project_id is null and cte1.`project number` in (select `project number` from cte1 group by `project number` having count(*) > 1) union select cte1.`Project Number`, null as indication,max_prj_id from cte1 left join vw_project_id prj on cte1.`Project Number` = prj.`Project Number` left join y on 1 = 1 where rnk = 1 and prj.project_id is null and cte1.`project number` in (select `project number` from cte1 groupby `project number` having count(*) = 1))`

Error-

pyspark.sql.utils.ParseException:
missing ')' at 'in'(line 1, pos 1575)

1 Answer 1

1

The last groupby should be group by.

Also try formatting your query, so it can be readable:

with cte1 as (
  select
    ` Project Number `,
    indication,
    rank() over (
      partition by ` Project Number `,
      REGEXP_REPLACE(indication, '[^a-zA-Z0-9]+', '')
      order by
        ` Project Number `,
        indication
    ) as rnk
  from
    (
      select
        distinct ` Project Number `,
        indication
      from
        vw_onco_pharma onco_pharma
      union
      select
        distinct ` Project Number `,
        indication
      from
        vw_onco_cell_gene cell_gene
      union
      select
        distinct ` Project Number `,
        indication
      from
        vw_non_onco_cell_gene onco_cell_gene
      union
      select
        distinct ` Project Number `,
        indication
      from
        vw_non_onco_pharma non_onco_pharma
      union
      select
        distinct ` Project Number `,
        indication
      from
        vw_plasma_protein plasma_protein
    )
),
y as (
  select
    max(cast(project_id as integer)) as max_prj_id
  from
    vw_project_id
)
select
  nvl(max_prj_id, 0) + ROW_NUMBER () OVER (
    ORDER BY
      ` Project Number `,
      indication
  ) as project_id,
  ` Project Number `,
  indication,
  date_format(current_timestamp(), 'yyyy-MM-dd hh:mm:ss') as HTA_INSERT_DT
from
  (
    select
      cte1.` Project Number `,
      cte1.indication,
      max_prj_id
    from
      cte1
      left join vw_project_id prj on cte1.` Project Number ` = prj.` Project Number `
      and REGEXP_REPLACE(cte1.indicatio, '[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(prj.indication, '[^a-zA-Z0-9]+', '')
      left join y on 1 = 1
    where
      rnk = 1
      and prj.project_id is null
      and cte1.` project number ` in (
        select
          ` project number `
        from
          cte1
        group by
          ` project number `
        having
          count(*) > 1
      )
    union
    select
      cte1.` Project Number `,
      null as indication,
      max_prj_id
    from
      cte1
      left join vw_project_id prj on cte1.` Project Number ` = prj.` Project Number `
      left join y on 1 = 1
    where
      rnk = 1
      and prj.project_id is null
      and cte1.` project number ` in (
        select
          ` project number `
        from
          cte1 group by ` project number `
        having
          count(*) = 1
      )
  )
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.