score-backend/app.py at master · cuappdev/score-backend · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import logging
import argparse
import signal
import sys
import time
from datetime import datetime, timedelta

from dotenv import load_dotenv

load_dotenv()

from flask import Flask, request, g
from flask_cors import CORS
from flask_jwt_extended import JWTManager
from flask_graphql import GraphQLView
from graphene import Schema
from src.schema import Query, Mutation
from src.scrapers.games_scraper import fetch_game_schedule
from src.scrapers.youtube_stats import fetch_videos
from src.scrapers.daily_sun_scrape import fetch_news
from src.services.article_service import ArticleService
from src.utils.constants import JWT_SECRET_KEY
from src.utils.team_loader import TeamLoader
from src.database import db

app = Flask(__name__)

# CORS: allow frontend (different origin) to call this API
CORS(app, supports_credentials=True)

# JWT config
app.config["JWT_SECRET_KEY"] = JWT_SECRET_KEY
app.config["JWT_ACCESS_TOKEN_EXPIRES"] = timedelta(hours=1)
app.config["JWT_REFRESH_TOKEN_EXPIRES"] = timedelta(days=30)

jwt = JWTManager(app)


@jwt.token_in_blocklist_loader
def check_if_token_revoked(jwt_header, jwt_payload: dict) -> bool:
    """Reject the request if the token's jti is in the blocklist (e.g. after logout)."""
    jti = jwt_payload["jti"]
    return db["token_blocklist"].find_one({"jti": jti}) is not None


@app.before_request
def start_timer():
    g.start = time.time()

    if request.path == "/graphql" and request.method == "POST":
        try:
            # Try to extract the GraphQL query name for better logging
            query_data = request.get_json()
            if query_data and "query" in query_data:
                g.query = query_data["query"].split("{", 1)[0].strip()
                logging.info(
                    f"[{time.strftime('%H:%M:%S')}] --> GraphQL {g.query} started"
                )
        except:
            pass

    logging.info(
        f"[{time.strftime('%H:%M:%S')}] --> {request.method} {request.path} started"
    )


@app.after_request
def log_response_time(response):
    if hasattr(g, "start"):
        duration = time.time() - g.start

        if duration > 5.0:  # Flag slow requests
            if hasattr(g, "query"):
                logging.warning(
                    f"[{time.strftime('%H:%M:%S')}] <-- SLOW GraphQL {g.query} ({duration:.2f}s)"
                )
            else:
                logging.warning(
                    f"[{time.strftime('%H:%M:%S')}] <-- SLOW {request.method} {request.path} ({duration:.2f}s)"
                )
        else:
            if hasattr(g, "query"):
                logging.info(
                    f"[{time.strftime('%H:%M:%S')}] <-- GraphQL {g.query} finished in {duration:.2f}s"
                )
            else:
                logging.info(
                    f"[{time.strftime('%H:%M:%S')}] <-- {request.method} {request.path} finished in {duration:.2f}s"
                )
    return response


# Configure logging
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%Y-%m-%d %H:%M:%S",
)

schema = Schema(query=Query, mutation=Mutation, auto_camelcase=True)


def create_context():
    return {"team_loader": TeamLoader()}


app.add_url_rule(
    "/graphql",
    view_func=GraphQLView.as_view(
        "graphql", schema=schema, graphiql=True, get_context=create_context
    ),
)

# Setup command line arguments
def parse_args():
    parser = argparse.ArgumentParser(description="Skip scraping tasks, for dev purposes.")
    parser.add_argument(
        "--no-scrape",
        action="store_true",
        help="Skips scraping tasks if set, useful for frontend development.",
    )
    parser.add_argument(
        "--no-daily-sun",
        action="store_true",
        help="Skips using the Daily Sun page for alerts",
    )
    return parser.parse_args()

# Only parse arguments when running directly (not when imported by gunicorn)
if __name__ == "__main__":
    args = parse_args()
else:
    # Default args when imported by gunicorn
    class DefaultArgs:
        no_scrape = False
        no_daily_sun = False
    args = DefaultArgs()

def signal_handler(sig, frame):
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

# Only parse arguments when running directly (not when imported by gunicorn)
if __name__ == "__main__":
    args = parse_args()
else:
    # Default args when imported by gunicorn
    class DefaultArgs:
        no_scrape = False
        no_daily_sun = False
    args = DefaultArgs()

# Only run scraping tasks if not disabled
if not args.no_scrape:
    from flask_apscheduler import APScheduler
    scheduler = APScheduler()
    scheduler.init_app(app)
    scheduler.start()

    @scheduler.task("interval", id="cleanse_token_blocklist", seconds=86400)  # 24 hours
    def cleanse_token_blocklist():
        """Remove expired tokens from blocklist so the collection doesn't grow forever."""
        from datetime import timezone
        from src.database import db
        result = db["token_blocklist"].delete_many(
            {"expires_at": {"$lt": datetime.now(timezone.utc)}}
        )
        if result.deleted_count:
            logging.info(f"Cleansed {result.deleted_count} expired token(s) from blocklist")

    @scheduler.task("interval", id="scrape_schedules", seconds=43200) # 12 hours
    def scrape_schedules():
        logging.info("Scraping game schedules...")
        fetch_game_schedule()

    @scheduler.task("interval", id="scrape_videos", seconds=43200) # 12 hours
    def scrape_videos():
        logging.info("Scraping YouTube videos...")
        fetch_videos()

    scrape_schedules()
    scrape_videos()

if not args.no_daily_sun and not args.no_scrape:
    @scheduler.task("interval", id="scrape_daily_sun", seconds=3600)
    def scrape_daily_sun():
        logging.info("Getting Daily Sun Sports News...")
        fetch_news()

    @scheduler.task("interval", id="cleanse_daily_sun_db", seconds=604800) # 1 week
    def cleanse_daily_sun_db():
        logging.info("Cleaning the Daily Sun database from old articles...")
        ArticleService.cleanse_old_articles()

    scrape_daily_sun()
    cleanse_daily_sun_db()


if __name__ == "__main__":
    app.run(debug=True, host="0.0.0.0", port=8000)