generated from nogibjj/Eric_Ortega_Rodriguez_Mini_Project_5
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
110 lines (88 loc) · 2.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from dotenv import load_dotenv
from mylib.extract import extract
from mylib.transform_load import load
from mylib.query import general_query
def main():
load_dotenv()
print("Extracting data....")
extract()
print("Transforming data...")
load()
print("Querying data from Databricks...")
query = '''
SELECT
a.`Name/Alias`,
SUM(a.Appearances) AS total_appearances,
COUNT(b.battle_id) AS total_battles
FROM
Avengers a
JOIN
Battles b ON a.avenger_id = b.avenger_id
GROUP BY
a.`Name/Alias`
HAVING
SUM(a.Appearances) > 100
ORDER BY
total_battles DESC, total_appearances DESC
'''
general_query(query)
if __name__ == "__main__":
main()
# import os
# from dotenv import load_dotenv
# from databricks import sql
# # Import your functions
# from mylib.extract import extract
# from mylib.query import general_query
# from mylib.transform_load import load
# def main():
# # Load environment variables
# load_dotenv()
# # Extract data
# print("Extracting data....")
# extract()
# # Transform and Load data
# print("Transforming data...")
# load()
# # Query data using Databricks connection
# print("Querying data from Databricks...")
# # Define a complex SQL query with joins, aggregation, and sorting
# query = '''
# SELECT
# a."Name/Alias",
# SUM(a.Appearances) AS total_appearances,
# COUNT(b.battle_id) AS total_battles
# FROM
# Avengers a
# JOIN
# Battles b ON a.avenger_id = b.avenger_id
# GROUP BY
# a."Name/Alias"
# HAVING
# SUM(a.Appearances) > 100 -- Only Avengers with more than 100 appearances
# ORDER BY
# total_battles DESC, total_appearances DESC
# '''
# # Run the query and log the results
# general_query(query)
# if __name__ == "__main__":
# main()
# # # Paths and parameters for the new dataset
# # dataset_path = "data/avengers.csv"
# # db_name = "avengers.db"
# # table_name = "Avengers"
# # # Extract
# # print("Extracting data from the database...")
# # # Extract data directly from the database
# # data = extract(database=db_name, table=table_name)
# # # Transform and load
# # print("Transforming and loading data...")
# # # Load the CSV data into the database if necessary
# # load(dataset=dataset_path, db_name=db_name, table_name=table_name)
# # # Query
# # print("Querying data...")
# # results = query(database=db_name, table=table_name)
# # # Print query results
# # print("Top 5 rows from the Avengers table:")
# # for row in results:
# # print(row)