Workflow saved

2025-08-28 11:00:26 +00:00
parent 4554a42d83
commit 0a844e08a1
3 changed files with 414 additions and 14 deletions
--- a/payment_metrics/main.py
+++ b/payment_metrics/main.py
@@ -71,12 +71,12 @@ bootstrap_udfs(spark)



-success_payments_df = spark.read.table('dremio.payments')
-success_payments_df.createOrReplaceTempView('success_payments_df')
+success_payments_reader_df = spark.read.table('dremio.payments')
+success_payments_reader_df.createOrReplaceTempView('success_payments_reader_df')

 # %%

-_success_payments_mapper_select_clause=success_payments_df.columns if False else []
+_success_payments_mapper_select_clause=success_payments_reader_df.columns if False else []

 _success_payments_mapper_select_clause.append("DATE(payment_date) AS payment_date")

@@ -87,7 +87,7 @@ _success_payments_mapper_select_clause.append("gateway AS gateway")
 _success_payments_mapper_select_clause.append("payment_method AS payment_method")


-success_payments_mapper_df=spark.sql(("SELECT " + ', '.join(_success_payments_mapper_select_clause) + " FROM success_payments_df").replace("{job_id}",f"'{job_id}'"))
+success_payments_mapper_df=spark.sql(("SELECT " + ', '.join(_success_payments_mapper_select_clause) + " FROM success_payments_reader_df").replace("{job_id}",f"'{job_id}'"))
 success_payments_mapper_df.createOrReplaceTempView("success_payments_mapper_df")

 # %%
@@ -273,3 +273,182 @@ high_valued_payments_df.createOrReplaceTempView('high_valued_payments_df')



+
+# %%
+
+
+
+failed_payments_reader_df = spark.read.table('dremio.failedpayments')
+failed_payments_reader_df.createOrReplaceTempView('failed_payments_reader_df')
+
+# %%
+
+_failed_payments_mapper_select_clause=failed_payments_reader_df.columns if False else []
+
+_failed_payments_mapper_select_clause.append("DATE(payment_date) AS payment_date")
+
+_failed_payments_mapper_select_clause.append("payment_method AS payment_method")
+
+_failed_payments_mapper_select_clause.append("failure_reason AS failure_reason")
+
+_failed_payments_mapper_select_clause.append("gateway AS gateway")
+
+
+failed_payments_mapper_df=spark.sql(("SELECT " + ', '.join(_failed_payments_mapper_select_clause) + " FROM failed_payments_reader_df").replace("{job_id}",f"'{job_id}'"))
+failed_payments_mapper_df.createOrReplaceTempView("failed_payments_mapper_df")
+
+# %%
+
+print(failed_payments_mapper_df.columns)
+final_failed_payments_df = spark.sql("select * from failed_payments_mapper_df where payment_date >= COALESCE((SELECT MAX(DATE(payment_date)) FROM dremio.failedpaymentmetrics), (SELECT MIN(payment_date) FROM failed_payments_mapper_df))")
+final_failed_payments_df.createOrReplaceTempView('final_failed_payments_df')
+
+# %%
+
+print(final_failed_payments_df.columns)
+filter__13_df = spark.sql("select * from final_failed_payments_df where gateway = \'CCS\'")
+filter__13_df.createOrReplaceTempView('filter__13_df')
+
+# %%
+
+
+
+
+
+
+
+
+
+_params = {
+    "datasource": "filter__13",
+    "selectFunctions" : [{'fieldName': 'total_failed_payments', 'aggregationFunction': 'COUNT(*)'}]
+}
+
+_df_flat, _grouping_specs, _rewritten_selects = preprocess_then_expand( filter__13_df,
+group_expression="payment_date",
+cube="",
+rollup="",
+grouping_set="",
+select_functions=[{'fieldName': 'total_failed_payments', 'aggregationFunction': 'COUNT(*)'}]
+)
+
+_agg_exprs = [expr(f["aggregationFunction"]).alias(f["fieldName"])
+for f in _rewritten_selects
+]
+
+_all_group_cols = list({c for gs in _grouping_specs for c in gs})
+
+_partials = []
+for _gs in _grouping_specs:
+    _gdf = _df_flat.groupBy(*_gs).agg(*_agg_exprs)
+    for _col in _all_group_cols:
+        if _col not in _gs:
+            _gdf = _gdf.withColumn(_col, lit(None))
+    _partials.append(_gdf)
+
+
+total_failed_payments___df = reduce(lambda a, b: a.unionByName(b), _partials)
+
+total_failed_payments___df.createOrReplaceTempView('total_failed_payments___df')
+
+
+
+
+
+# %%
+
+
+
+
+
+
+
+
+
+_params = {
+    "datasource": "final_failed_payments",
+    "selectFunctions" : [{'fieldName': 'failure_count', 'aggregationFunction': 'COUNT(*)'}]
+}
+
+_df_flat, _grouping_specs, _rewritten_selects = preprocess_then_expand( final_failed_payments_df,
+group_expression="payment_date, gateway, failure_reason",
+cube="",
+rollup="",
+grouping_set="",
+select_functions=[{'fieldName': 'failure_count', 'aggregationFunction': 'COUNT(*)'}]
+)
+
+_agg_exprs = [expr(f["aggregationFunction"]).alias(f["fieldName"])
+for f in _rewritten_selects
+]
+
+_all_group_cols = list({c for gs in _grouping_specs for c in gs})
+
+_partials = []
+for _gs in _grouping_specs:
+    _gdf = _df_flat.groupBy(*_gs).agg(*_agg_exprs)
+    for _col in _all_group_cols:
+        if _col not in _gs:
+            _gdf = _gdf.withColumn(_col, lit(None))
+    _partials.append(_gdf)
+
+
+failed_payment_metrics_df = reduce(lambda a, b: a.unionByName(b), _partials)
+
+failed_payment_metrics_df.createOrReplaceTempView('failed_payment_metrics_df')
+
+
+
+
+
+# %%
+
+
+
+
+_data_writer__15_fields_to_update = failed_payment_metrics_df.columns
+_data_writer__15_set_clause=[]
+_data_writer__15_unique_key_clause= []
+
+for _key in ['payment_date', 'gateway', 'failure_reason']:
+    _data_writer__15_unique_key_clause.append(f't.{_key} = s.{_key}')
+
+for _field in _data_writer__15_fields_to_update:
+    if(_field not in _data_writer__15_unique_key_clause):
+        _data_writer__15_set_clause.append(f't.{_field} = s.{_field}')
+
+_merge_query = '''
+    MERGE INTO dremio.failedpaymentmetrics t
+    USING failed_payment_metrics_df s
+    ON ''' + ' AND '.join(_data_writer__15_unique_key_clause) + ''' WHEN MATCHED THEN
+      UPDATE SET ''' +  ', '.join(_data_writer__15_set_clause) + ' WHEN NOT MATCHED THEN INSERT *'
+
+spark.sql(_merge_query)
+
+
+
+# %%
+
+print(total_payments_and_total_value_processed_df.columns)
+print(most_used_payment_method___df.columns)
+print(high_valued_payments_df.columns)
+print(total_failed_payments___df.columns)
+
+success_payment_metrics_df = spark.sql("""
+SELECT 
+  COALESCE(a.payment_date, d.payment_date) AS payment_date,
+  a.total_payments,
+  a.total_value_processed,
+  b.most_used_payment_method,
+  c.high_valued_payments,
+  d.total_failed_payments
+FROM total_failed_payments___df d
+FULL OUTER JOIN total_payments_and_total_value_processed_df a 
+  ON a.payment_date = d.payment_date
+LEFT JOIN most_used_payment_method___df b 
+  ON a.payment_date = b.payment_date
+LEFT JOIN high_valued_payments_df c 
+  ON a.payment_date = c.payment_date
+""")
+
+success_payment_metrics_df.createOrReplaceTempView('success_payment_metrics_df')
--- a/payment_metrics/main.py.notebook
+++ b/payment_metrics/main.py.notebook
@@ -74,19 +74,19 @@ def init():


@app.cell
-def success_payments(spark):
+def success_payments_reader(spark):



-    success_payments_df = spark.read.table('dremio.payments')
-    success_payments_df.createOrReplaceTempView('success_payments_df')
-    return (success_payments_df,)
+    success_payments_reader_df = spark.read.table('dremio.payments')
+    success_payments_reader_df.createOrReplaceTempView('success_payments_reader_df')
+    return (success_payments_reader_df,)


@app.cell
-def success_payments_mapper(job_id, spark, success_payments_df):
+def success_payments_mapper(job_id, spark, success_payments_reader_df):

-    _success_payments_mapper_select_clause=success_payments_df.columns if False else []
+    _success_payments_mapper_select_clause=success_payments_reader_df.columns if False else []

    _success_payments_mapper_select_clause.append("DATE(payment_date) AS payment_date")

@@ -97,7 +97,7 @@ def success_payments_mapper(job_id, spark, success_payments_df):
    _success_payments_mapper_select_clause.append("payment_method AS payment_method")


-    success_payments_mapper_df=spark.sql(("SELECT " + ', '.join(_success_payments_mapper_select_clause) + " FROM success_payments_df").replace("{job_id}",f"'{job_id}'"))
+    success_payments_mapper_df=spark.sql(("SELECT " + ', '.join(_success_payments_mapper_select_clause) + " FROM success_payments_reader_df").replace("{job_id}",f"'{job_id}'"))
    success_payments_mapper_df.createOrReplaceTempView("success_payments_mapper_df")
    return (success_payments_mapper_df,)

@@ -172,7 +172,7 @@ def total_payments_and_total_value_processed(



-    return
+    return (total_payments_and_total_value_processed_df,)


@app.cell
@@ -270,7 +270,7 @@ def most_used_payment_method__(filter__6_df, job_id, spark):

    most_used_payment_method___df=spark.sql(("SELECT " + ', '.join(_most_used_payment_method___select_clause) + " FROM filter__6_df").replace("{job_id}",f"'{job_id}'"))
    most_used_payment_method___df.createOrReplaceTempView("most_used_payment_method___df")
-    return
+    return (most_used_payment_method___df,)


@app.cell
@@ -325,6 +325,227 @@ def high_valued_payments(



+    return (high_valued_payments_df,)
+
+
+@app.cell
+def failed_payments_reader(spark):
+
+
+
+    failed_payments_reader_df = spark.read.table('dremio.failedpayments')
+    failed_payments_reader_df.createOrReplaceTempView('failed_payments_reader_df')
+    return (failed_payments_reader_df,)
+
+
+@app.cell
+def failed_payments_mapper(failed_payments_reader_df, job_id, spark):
+
+    _failed_payments_mapper_select_clause=failed_payments_reader_df.columns if False else []
+
+    _failed_payments_mapper_select_clause.append("DATE(payment_date) AS payment_date")
+
+    _failed_payments_mapper_select_clause.append("payment_method AS payment_method")
+
+    _failed_payments_mapper_select_clause.append("failure_reason AS failure_reason")
+
+    _failed_payments_mapper_select_clause.append("gateway AS gateway")
+
+
+    failed_payments_mapper_df=spark.sql(("SELECT " + ', '.join(_failed_payments_mapper_select_clause) + " FROM failed_payments_reader_df").replace("{job_id}",f"'{job_id}'"))
+    failed_payments_mapper_df.createOrReplaceTempView("failed_payments_mapper_df")
+    return (failed_payments_mapper_df,)
+
+
+@app.cell
+def final_failed_payments(failed_payments_mapper_df, spark):
+
+    print(failed_payments_mapper_df.columns)
+    final_failed_payments_df = spark.sql("select * from failed_payments_mapper_df where payment_date >= COALESCE((SELECT MAX(DATE(payment_date)) FROM dremio.failedpaymentmetrics), (SELECT MIN(payment_date) FROM failed_payments_mapper_df))")
+    final_failed_payments_df.createOrReplaceTempView('final_failed_payments_df')
+    return (final_failed_payments_df,)
+
+
+@app.cell
+def filter__13(final_failed_payments_df, spark):
+
+    print(final_failed_payments_df.columns)
+    filter__13_df = spark.sql("select * from final_failed_payments_df where gateway = \'CCS\'")
+    filter__13_df.createOrReplaceTempView('filter__13_df')
+    return (filter__13_df,)
+
+
+@app.cell
+def total_failed_payments__(
+    expr,
+    filter__13_df,
+    lit,
+    preprocess_then_expand,
+    reduce,
+):
+
+
+
+
+
+
+
+
+
+    _params = {
+        "datasource": "filter__13",
+        "selectFunctions" : [{'fieldName': 'total_failed_payments', 'aggregationFunction': 'COUNT(*)'}]
+    }
+
+    _df_flat, _grouping_specs, _rewritten_selects = preprocess_then_expand( filter__13_df,
+    group_expression="payment_date",
+    cube="",
+    rollup="",
+    grouping_set="",
+    select_functions=[{'fieldName': 'total_failed_payments', 'aggregationFunction': 'COUNT(*)'}]
+    )
+
+    _agg_exprs = [expr(f["aggregationFunction"]).alias(f["fieldName"])
+    for f in _rewritten_selects
+    ]
+
+    _all_group_cols = list({c for gs in _grouping_specs for c in gs})
+
+    _partials = []
+    for _gs in _grouping_specs:
+        _gdf = _df_flat.groupBy(*_gs).agg(*_agg_exprs)
+        for _col in _all_group_cols:
+            if _col not in _gs:
+                _gdf = _gdf.withColumn(_col, lit(None))
+        _partials.append(_gdf)
+
+
+    total_failed_payments___df = reduce(lambda a, b: a.unionByName(b), _partials)
+
+    total_failed_payments___df.createOrReplaceTempView('total_failed_payments___df')
+
+
+
+
+    return (total_failed_payments___df,)
+
+
+@app.cell
+def failed_payment_metrics(
+    expr,
+    final_failed_payments_df,
+    lit,
+    preprocess_then_expand,
+    reduce,
+):
+
+
+
+
+
+
+
+
+
+    _params = {
+        "datasource": "final_failed_payments",
+        "selectFunctions" : [{'fieldName': 'failure_count', 'aggregationFunction': 'COUNT(*)'}]
+    }
+
+    _df_flat, _grouping_specs, _rewritten_selects = preprocess_then_expand( final_failed_payments_df,
+    group_expression="payment_date, gateway, failure_reason",
+    cube="",
+    rollup="",
+    grouping_set="",
+    select_functions=[{'fieldName': 'failure_count', 'aggregationFunction': 'COUNT(*)'}]
+    )
+
+    _agg_exprs = [expr(f["aggregationFunction"]).alias(f["fieldName"])
+    for f in _rewritten_selects
+    ]
+
+    _all_group_cols = list({c for gs in _grouping_specs for c in gs})
+
+    _partials = []
+    for _gs in _grouping_specs:
+        _gdf = _df_flat.groupBy(*_gs).agg(*_agg_exprs)
+        for _col in _all_group_cols:
+            if _col not in _gs:
+                _gdf = _gdf.withColumn(_col, lit(None))
+        _partials.append(_gdf)
+
+
+    failed_payment_metrics_df = reduce(lambda a, b: a.unionByName(b), _partials)
+
+    failed_payment_metrics_df.createOrReplaceTempView('failed_payment_metrics_df')
+
+
+
+
+    return (failed_payment_metrics_df,)
+
+
+@app.cell
+def data_writer__15(failed_payment_metrics_df, spark):
+
+
+
+
+    _data_writer__15_fields_to_update = failed_payment_metrics_df.columns
+    _data_writer__15_set_clause=[]
+    _data_writer__15_unique_key_clause= []
+
+    for _key in ['payment_date', 'gateway', 'failure_reason']:
+        _data_writer__15_unique_key_clause.append(f't.{_key} = s.{_key}')
+
+    for _field in _data_writer__15_fields_to_update:
+        if(_field not in _data_writer__15_unique_key_clause):
+            _data_writer__15_set_clause.append(f't.{_field} = s.{_field}')
+
+    _merge_query = '''
+        MERGE INTO dremio.failedpaymentmetrics t
+        USING failed_payment_metrics_df s
+        ON ''' + ' AND '.join(_data_writer__15_unique_key_clause) + ''' WHEN MATCHED THEN
+          UPDATE SET ''' +  ', '.join(_data_writer__15_set_clause) + ' WHEN NOT MATCHED THEN INSERT *'
+
+    spark.sql(_merge_query)
+
+
+    return
+
+
+@app.cell
+def success_payment_metrics(
+    high_valued_payments_df,
+    most_used_payment_method___df,
+    spark,
+    total_failed_payments___df,
+    total_payments_and_total_value_processed_df,
+):
+
+    print(total_payments_and_total_value_processed_df.columns)
+    print(most_used_payment_method___df.columns)
+    print(high_valued_payments_df.columns)
+    print(total_failed_payments___df.columns)
+
+    success_payment_metrics_df = spark.sql("""
+    SELECT 
+      COALESCE(a.payment_date, d.payment_date) AS payment_date,
+      a.total_payments,
+      a.total_value_processed,
+      b.most_used_payment_method,
+      c.high_valued_payments,
+      d.total_failed_payments
+    FROM total_failed_payments___df d
+    FULL OUTER JOIN total_payments_and_total_value_processed_df a 
+      ON a.payment_date = d.payment_date
+    LEFT JOIN most_used_payment_method___df b 
+      ON a.payment_date = b.payment_date
+    LEFT JOIN high_valued_payments_df c 
+      ON a.payment_date = c.payment_date
+    """)
+
+    success_payment_metrics_df.createOrReplaceTempView('success_payment_metrics_df')
    return


--- a/payment_metrics/main.workflow
+++ b/payment_metrics/main.workflow