utilities

Running

App Files Files Community

Reggie commited on Aug 13, 2023

Commit

0768472

•

1 Parent(s): bb59f5d

Update app.py

Browse files

Files changed (1) hide show

app.py +242 -53

app.py CHANGED Viewed

@@ -1,60 +1,249 @@
 import os
-import requests
 import json
-from io import BytesIO
-from flask import Flask, jsonify, render_template, request, send_file
-from modules.inference import infer_t5
-from modules.dataset import query_emotion
-# https://huggingface.co/settings/tokens
-# https://huggingface.co/spaces/{username}/{space}/settings
-API_TOKEN = os.getenv("BIG_GAN_TOKEN")
-app = Flask(__name__)
-@app.route("/")
-def index():
-    return render_template("index.html")
-@app.route("/infer_biggan")
-def biggan():
-    input = request.args.get("input")
-    output = requests.request(
-        "POST",
-        "https://api-inference.huggingface.co/models/osanseviero/BigGAN-deep-128",
-        headers={"Authorization": f"Bearer {API_TOKEN}"},
-        data=json.dumps(input),
     )
-    return send_file(BytesIO(output.content), mimetype="image/png")
-@app.route("/infer_t5")
-def t5():
-    input = request.args.get("input")
-    output = infer_t5(input)
-    return jsonify({"output": output})
-@app.route("/query_emotion")
-def emotion():
-    start = request.args.get("start")
-    end = request.args.get("end")
-    print(start)
-    print(end)
-    output = query_emotion(int(start), int(end))
-    return jsonify({"output": output})
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

+from authlib.integrations.flask_client import OAuth
+from authlib.common.security import generate_token
+import ffmpeg
+from flask import Flask, render_template, request, jsonify, url_for, redirect, session
+from functools import wraps
 import os
+import streamlink
+import threading
+import time
+from faster_whisper import WhisperModel
+import subprocess
+from datetime import datetime as dt
+from datetime import timedelta, timezone
+from apiclient import discovery
+from google.oauth2 import service_account
 import json
+# Import secrets
+client_secret = os.environ.get("client_secret")
+gdoc_id = os.environ.get("gdoc_id")
+GOOGLE_CLIENT_SECRET = os.environ.get("GOOGLE_CLIENT_SECRET")
+GOOGLE_CLIENT_ID = os.environ.get("GOOGLE_CLIENT_ID")
+allowed_users = os.environ.get("allowed_users")
+# Faster Whisper setup
+model_size = 'small'
+beamsize = 2
+wmodel = WhisperModel(model_size, device="cpu", compute_type="int8")
+# Delete local_transcript if it exists
+if not os.path.exists('transcription_files'): os.makedirs('transcription_files')
+for f in os.listdir('transcription_files/'): os.remove(os.path.join('transcription_files/', f)) # clear any old files in transcription_files folder
+with open("client_secret.json", "w") as json_file: json.dump(client_secret, json_file, indent=4)
+scopes = ["https://www.googleapis.com/auth/documents", "https://www.googleapis.com/auth/drive.file"]
+credentials = service_account.Credentials.from_service_account_file('client_secret.json', scopes=scopes)
+service = discovery.build('docs', 'v1', credentials=credentials)
+local_tz = 5.5 # For timestamps
+local_transcript = 'transcription_files/tr.txt'
+pid_file = 'transcription_files/pid.txt'
+# Check if mp3 folder exists, and create it if it doesn't
+if not os.path.exists('mp3'): os.makedirs('mp3')
+# Delete any old files in mp3 folder
+for f in os.listdir('mp3/'): os.remove(os.path.join('mp3/', f))
+app = Flask(__name__, static_url_path='/static')
+app.secret_key = os.urandom(12)
+oauth = OAuth(app)
+# Store the streamlink process
+stream_process = None
+recording = False
+mp3_extraction_process = None
+def update_gdoc(text, gdoc_id): # Update contents of google doc
+    print('Updating Google Doc', gdoc_id)
+    doc = service.documents().get(documentId=gdoc_id).execute()
+    endindex = [p['endIndex'] for p in doc['body']['content'] if 'paragraph' in p][-1]
+    try:
+        body = {'requests': [{'insertText': {'location': {'index': endindex-1,}, 'text': ' ' + text}}]}
+        result = service.documents().batchUpdate(documentId=gdoc_id, body=body).execute()
+        print(result)
+    except Exception as e:
+        print(e)
+def process_complete_callback(retcode, **kwargs):
+    if retcode == 0:
+        print("FFmpeg process completed successfully!")
+    else:
+        print("FFmpeg process encountered an error.")
+def transcribe_audio(latest_file, time_counter):
+    print('transcribing ', latest_file)
+    segments, info = wmodel.transcribe(f"{latest_file}", beam_size=beamsize) # beamsize is 2.
+    text = ''
+    for segment in segments:
+        text += segment.text
+    transcribed = text.replace('\n', ' ').replace('  ', ' ')
+    if time_counter%5  == 0:
+        transcribed_sents = transcribed.split('. ') # Get the first fullstop break and append to previous para, before adding time code
+        transcribed = transcribed_sents[0] + '\nTime ' + str((dt.now(timezone.utc) + timedelta(hours=local_tz)).strftime('%H:%M:%S')) + '\n' + '. '.join(transcribed_sents[1:])
+    time_counter += 1
+    return transcribed, time_counter
+def save_audio(youtube_url):
+    global stream_process, recording, mp3_extraction_process
+    try:
+        streams = streamlink.streams(youtube_url)
+        #if "audio" not in streams:
+        #    raise Exception("No audio stream found.")
+        stream_url = streams["144p"].url
+        time_counter = 0
+        while recording:
+            # Save audio only into mp3 files
+            saved_mp3 = f"mp3/audio_{int(time.time())}.mp3"
+            mp3_extraction_process = (
+                ffmpeg
+                .input(stream_url, t=30)
+                .audio
+                # TODO - change destination url to relevant url
+                .output(saved_mp3)
+                .overwrite_output()
+                .global_args('-loglevel', 'panic')
+                .run_async()
+                )
+            print('pid', mp3_extraction_process.pid)
+            # write the pid to pid_file
+            with open(pid_file, 'w') as f: f.write(str(mp3_extraction_process.pid))
+            # If there is more than one mp3 file in the folder, transcribe the one that is not being written to
+            mp3files = [f for f in os.listdir('mp3') if f.endswith('.mp3')]
+            if len(mp3files) < 2:
+                print('Sleeping for 30s as only one mp3 file in folder')
+                time.sleep(30)
+            else:
+                starttime = time.time()
+                file_to_transcribe = [f for f in mp3files if f != os.path.basename(saved_mp3)][0]
+                print('Working on ', file_to_transcribe)
+                transcribed, time_counter = transcribe_audio(f'mp3/{file_to_transcribe}', time_counter)
+                os.remove(f'mp3/{file_to_transcribe}')
+                update_gdoc(transcribed, gdoc_id)
+                with open(local_transcript, 'a', encoding='utf-8', errors='ignore') as f: f.write(transcribed)
+                elapsed_time = time.time() - starttime
+                print('Time to transcribe:', elapsed_time, 'seconds')
+                if elapsed_time < 30:
+                    print(f'Sleeping for {30-elapsed_time} as there are more than one mp3 files in folder')
+                    time.sleep(30-elapsed_time)
+            #time.sleep(30)
+    except Exception as e:
+        recording = False
+        print('exception', str(e))
+        return str(e)
+@app.route("/start_process", methods=["POST"])
+def start_process():
+    if not os.path.isfile(local_transcript):
+        global recording, stream_process
+        with open(local_transcript, 'a', encoding='utf-8', errors='ignore') as f: f.write('') # Create the local transcript file, which is used as a check to prevent multiple recordings
+        youtube_url = request.form.get("url")
+        if not youtube_url:
+            return jsonify({"message": "Please provide a valid YouTube URL."}), 400
+        if recording:
+            return jsonify({"message": "A recording is already in progress."}), 400
+        print('In start process')
+        recording = True
+        stream_process = threading.Thread(target=save_audio, args=(youtube_url,))
+        stream_process.start()
+        return jsonify({"message": "Recording started."}), 200
+    else: return jsonify({"message": "Recording is already in progress."}), 400
+@app.route("/stop_process", methods=["POST"])
+def stop_process():
+    global recording, stream_process, mp3_extraction_process
+    if not recording:
+        return jsonify({"message": "No recording is currently in progress."}), 400
+    print('In stop process')
+    recording = False
+    stream_process.join()
+    stream_process = None
+    mp3_extraction_process.terminate()
+    mp3_extraction_process = None
+    for f in os.listdir('mp3/'): os.remove(os.path.join('mp3/', f))
+    if os.path.isfile(local_transcript): os.remove(local_transcript)
+    # check if pid_file exists, get the pid inside it and convert to int, and use os.kill to kill it
+    if os.path.isfile(pid_file):
+        with open(pid_file, 'r') as f: pid = int(f.read())
+        try: os.kill(pid, 9) # For linux
+        except:
+            try:
+                process = subprocess.Popen(["taskkill", "/F", "/PID", str(pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # For Windows
+                process.communicate()
+                print("Process terminated successfully.")
+            except Exception as e:
+                print("Error:", e)
+        os.remove(pid_file)
+    return jsonify({"message": "Recording stopped."}), 200
+@app.route('/google/')
+def google():
+    CONF_URL = 'https://accounts.google.com/.well-known/openid-configuration'
+    oauth.register(
+        name='google',
+        client_id=GOOGLE_CLIENT_ID,
+        client_secret=GOOGLE_CLIENT_SECRET,
+        server_metadata_url=CONF_URL,
+        client_kwargs={"scope": "openid email profile"}
     )
+    # Redirect to google_auth function/page
+    redirect_uri = url_for('google_auth', _external=True)
+    session['nonce'] = generate_token()
+    return oauth.google.authorize_redirect(redirect_uri, nonce=session['nonce'])
+@app.route('/google/auth/')
+def google_auth():
+    token = oauth.google.authorize_access_token()
+    user = oauth.google.parse_id_token(token, nonce=session['nonce'])
+    session['user'] = user
+    print('USER', user)
+    # Redirect to home if login successful
+    return redirect('/home')
+def is_not_logged_in():
+    return session.get('user') is None or session.get('nonce') is None
+# decorator to check if user is logged in, used for protected URLs
+def login_required(f):
+    @wraps(f)
+    def decorated_function(*args, **kwargs):
+        if is_not_logged_in():
+            return redirect('/login')
+        return f(*args, **kwargs)
+    return decorated_function
+@app.route("/home")
+@login_required
+def home():
+    return render_template("home.html")
+@app.route("/", methods=["GET"])
+@app.route("/login", methods=["GET"])
+def login():
+    if not is_not_logged_in():
+        return redirect("/home")
+    return render_template("login.html")
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", debug=True, port=8081)