import os import streamlit as st import hashlib import uuid import time import json import numpy as np from concrete.ml.sklearn import SGDClassifier from blockchain import Blockchain, print_blockchain_details import watermarking from watermarking import watermark_model def generate_mock_hash(): return hashlib.sha256(str(time.time()).encode()).hexdigest() from utils import ( CLIENT_DIR, CURRENT_DIR, DEPLOYMENT_DIR, KEYS_DIR, INPUT_BROWSER_LIMIT, clean_directory, SERVER_DIR, ) from concrete.ml.deployment import FHEModelClient st.set_page_config(layout="wide") st.sidebar.title("Contact") st.sidebar.info( """ - Reda Bellafqira - Mehdi Ben Ghali - Pierre-Elisée Flory - Mohammed Lansari - Thomas Winninger """ ) st.title("Zamark: Secure Watermarking Service") # st.image( # "llm_watermarking.png", # caption="A Watermark for Large Language Models (https://doi.org/10.48550/arXiv.2301.10226)", # ) def todo(): st.warning("Not implemented yet", icon="⚠️") def key_gen_fn(client_id): """ Generate keys for a given user. The keys are saved in KEYS_DIR !!! needs a model in DEPLOYMENT_DIR as "client.zip" !!! Args: client_id (str): The client_id, retrieved from streamlit """ clean_directory() client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{client_id}") client.load() # Creates the private and evaluation keys on the client side client.generate_private_and_evaluation_keys() # Get the serialized evaluation keys serialized_evaluation_keys = client.get_serialized_evaluation_keys() assert isinstance(serialized_evaluation_keys, bytes) # Save the evaluation key evaluation_key_path = KEYS_DIR / f"{client_id}/evaluation_key" with evaluation_key_path.open("wb") as f: f.write(serialized_evaluation_keys) # show bit of key serialized_evaluation_keys_shorten_hex = serialized_evaluation_keys.hex()[ :INPUT_BROWSER_LIMIT ] # shpw len of key # f"{len(serialized_evaluation_keys) / (10**6):.2f} MB" with st.expander("Generated keys"): st.write(f"{len(serialized_evaluation_keys) / (10**6):.2f} MB") st.code(serialized_evaluation_keys_shorten_hex) st.success("Keys have been generated!", icon="✅") # def gen_trigger_set(client_id, hf_id): # # input : random images seeded by client_id # # labels : binary array of the id # watermark_uuid = uuid.uuid1() # hash = hashlib.sha256() # hash.update(client_id + str(watermark_uuid)) # client_seed = hash.digest() # hash = hashlib.sha256() # hash.update(hf_id + str(watermark_uuid)) # hf_seed = hash.digest() # # trigger_set_size = 128 # # trigger_set_client = [ # {"input": 1, "label": digit} for digit in encode_id(client_id, trigger_set_size) # ] # # todo() # # # def encode_id(ascii_rep, size=128): # """Encode a string id to a string of bits # # Args: # ascii_rep (_type_): The id string # size (_type_): The size of the output bit string # # Returns: # _type_: a string of bits # """ # return "".join([format(ord(x), "b").zfill(8) for x in client_id])[:size] def decode_id(binary_rep): """Decode a string of bits to an ascii string Args: binary_rep (_type_): the binary string Returns: _type_: an ascii string """ # Initializing a binary string in the form of # 0 and 1, with base of 2 binary_int = int(binary_rep, 2) # Getting the byte number byte_number = binary_int.bit_length() + 7 // 8 # Getting an array of bytes binary_array = binary_int.to_bytes(byte_number, "big") # Converting the array into ASCII text ascii_text = binary_array.decode() # Getting the ASCII value return ascii_text # def compare_id(client_id, binary_triggert_set_result): # """Compares the string id with the labels of the trigger set on the tested API # # Args: # client_id (_type_): the ascii string # binary_triggert_set_result (_type_): the binary string # # Returns: # _type_: _description_ # """ # ground_truth = encode_id(client_id, 128) # # correct_bit = 0 # for true_bit, real_bit in zip(ground_truth, binary_triggert_set_result): # if true_bit != real_bit: # correct_bit += 1 # # return correct_bit / len(binary_triggert_set_result) # # def watermark(model, trigger_set): # """Watermarking function # # Args: # model (_type_): The model to watermark # trigger_set (_type_): the trigger set # """ # X_trigger, y_trigger = trigger_set # watermarked_model = watermarking.watermark_model(model, X_trigger, y_trigger) # # model_file_path = SERVER_DIR / "watermarked_model" # trigger_set_file_path = SERVER_DIR / "trigger_set" # # # # # TODO: remove once model correctly watermarked "Reda continue" # model_file_path.touch() # trigger_set_file_path.touch() # # # Once the model is watermarked and dumped to files (model + trigger set), the user can download them # with open(model_file_path, "rb") as model_file: # st.download_button( # label="Download the watermarked file", # data=model_file, # mime="application/octet-stream", # ) # with open(trigger_set_file_path, "rb") as trigger_set_file: # st.download_button( # label="Download the triggert set", # data=trigger_set_file, # mime="application/octet-stream", # ) st.header("Client Configuration", divider=True) # client_id = st.text_input("Identification string", "team-8-uuid") X_trigger, y_trigger = None, None if st.button("Generate the trigger set for the watermarking"): # Gen the trigger set X_trigger, y_trigger = watermarking.gen_trigger_set() # watermarked_model = watermarking.watermark_model(model, X_trigger, y_trigger) np.save("x_trigger", X_trigger) np.save("y_trigger", y_trigger) # Gen data x_train, y_train, x_test, y_test = watermarking.gen_database() np.save("x_train", x_train) np.save("y_train", y_train) np.save("x_test", x_test) np.save("y_test", y_test) # Afficher un message de succès st.success("Trigger set generated and data saved successfully!") # Optionnel : Afficher des informations supplémentaires st.write(f"Trigger set shape: X={X_trigger.shape}, y={y_trigger.shape}") st.write(f"Training data shape: X={x_train.shape}, y={y_train.shape}") st.write(f"Test data shape: X={x_test.shape}, y={y_test.shape}") st.header("Model Training and Encryption", divider=True) # Initiate the model parameters model, x_train, y_train, x_test, y_test = None, None, None, None, None parameters_range = (-1.0, 1.0) if st.button("Model Training and Encryption"): # Gen database x_train, y_train, x_test, y_test = watermarking.gen_database() # Train the model # model = watermarking.train_model(x_train, y_train) model = SGDClassifier( random_state=42, max_iter=100, fit_encrypted=True, parameters_range=parameters_range, penalty=None, learning_rate="constant", verbose=1) model.coef_ = np.load("model_coef.npy") model.intercept_ = np.load("model_intercept.npy") # Afficher un message de succès st.success("Model training and encryption completed successfully!") # Afficher des informations supplémentaires st.write("Model Information:") st.write(f"- Type: {type(model).__name__}") st.write(f"- Number of features: {model.coef_.shape[1]}") st.write(f"- Parameters range: {parameters_range}") st.write("\nData Information:") st.write(f"- Training set shape: X={x_train.shape}, y={y_train.shape}") st.write(f"- Test set shape: X={x_test.shape}, y={y_test.shape}") # Optionnel : Afficher un aperçu des coefficients du modèle st.write("\nModel Coefficients Preview:") st.write(model.coef_[:5]) # Affiche les 5 premiers coefficients st.header("Model Watermarking", divider=True) # if st.button("Model Watermarking"): # # encrypted_model = st.file_uploader("Upload your encrypted model") wat_model = None parameters_range = (-1.0, 1.0) if st.button("Model Watermarking"): # watermark(None, None) # wat_model = watermarking.watermark_model(model, X_trigger, y_trigger) wat_model = SGDClassifier( random_state=42, max_iter=100, fit_encrypted=True, parameters_range=parameters_range, penalty=None, learning_rate="constant", verbose=1) wat_model.coef_ = np.load("wat_model_coef.npy") wat_model.intercept_ = np.load("wat_model_intercept.npy") # Afficher un message de succès st.success("Model watermarking completed successfully!") # Afficher des informations sur le modèle tatoué st.write("Watermarked Model Information:") st.write(f"- Type: {type(wat_model).__name__}") st.write(f"- Number of features: {wat_model.coef_.shape[1]}") st.write(f"- Parameters range: {parameters_range}") # # # st.header("Watermarking evaluation", divider=True) # parameters_range = (-1.0, 1.0) # if st.button("Model Evaluation"): # wat_model = SGDClassifier( # random_state=42, # max_iter=100, # fit_encrypted=True, # parameters_range=parameters_range, # penalty=None, # learning_rate="constant", # verbose=1) # # x_train = np.load("x_train.npy") # y_train = np.load("y_train.npy") # x_test = np.load("x_test.npy") # y_test = np.load("y_test.npy") # # wat_model.coef_ = np.load("wat_model_coef.npy") # wat_model.intercept_ = np.load("wat_model_intercept.npy") # # wat_model.fit(X_trigger, y_trigger, fhe="simulate") # wat_model.compile(x_train) # watermarking.evaluate(wat_model, x_train, y_train, x_test, y_test, X_trigger, y_trigger) st.header("Update Blockchain", divider=True) # Initialize session state to store the block data if 'block_data' not in st.session_state: st.session_state.block_data = None # Button to update the blockchain if st.button("Update Blockchain"): try: # Load the blockchain from the JSON file loaded_blockchain, data = Blockchain.load_from_file("blockchain.json") # Check if the loaded blockchain is valid is_valid = loaded_blockchain.is_chain_valid() st.write(f"Loaded blockchain is valid: {is_valid}") if not is_valid: st.warning("The loaded blockchain is not valid. Please check data integrity.") else: parameters_range = (-1.0, 1.0) wat_model = SGDClassifier( random_state=42, max_iter=100, fit_encrypted=True, parameters_range=parameters_range, penalty=None, learning_rate="constant", verbose=1) wat_model.coef_ = np.load("wat_model_coef.npy") wat_model.intercept_ = np.load("wat_model_intercept.npy") X_trigger = np.load("x_trigger.npy") y_trigger = np.load("y_trigger.npy") watermarked_model_hash = watermarking.get_model_hash(wat_model) trigger_set_hf = watermarking.get_trigger_hash(X_trigger, y_trigger) trigger_set_client = watermarking.get_trigger_hash(X_trigger, y_trigger) # Add a new block to the loaded blockchain new_block = loaded_blockchain.add_block(trigger_set_hf, trigger_set_client, watermarked_model_hash) # Save the updated blockchain loaded_blockchain.save_to_file("blockchain.json") # Update session data st.session_state.block_data = new_block.to_dict() st.success("Blockchain updated successfully!") # Display information about the new block st.subheader("New Block Information") st.write(f"Block ID: {new_block.counter}") st.write(f"Timestamp: {new_block.timestamp}") st.write(f"Previous Hash: {new_block.previous_hash}") st.write(f"Current Hash: {new_block.hash}") # Display blockchain statistics st.subheader("Blockchain Statistics") st.write(f"Total Blocks: {len(loaded_blockchain.chain)}") st.write(f"Blockchain File Size: {os.path.getsize('blockchain.json') / 1024:.2f} KB") except Exception as e: st.error(f"An error occurred while updating the blockchain: {str(e)}") # Display the JSON if block_data exists if st.session_state.block_data: st.subheader("Latest Block Data (JSON)") # Convert the data to a formatted JSON string block_json = json.dumps(st.session_state.block_data, indent=2) # Display the JSON st.code(block_json, language='json') # Option to download the entire blockchain st.subheader("Download Blockchain") with open("blockchain.json", "rb") as file: btn = st.download_button( label="Download Blockchain JSON", data=file, file_name="blockchain.json", mime="application/json" )