Spaces:

MusoraProductDepartment
/

AI_Message_Generator

Sleeping

App Files Files Community

Danialebrat commited on 27 days ago

Commit

822ac98

1 Parent(s): 240a85a

Deploying new UI for AI messaging system

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +7 -7
.gitignore +0 -8
.idea/AI_Message_Generator.iml +4 -2
.idea/misc.xml +1 -1
.streamlit/config.toml +0 -3
CIO/CIO_integration_Python.py +0 -146
Config_files/message_system_config.json +0 -35
Config_files/singeo_phrases.txt +0 -25
Data/Singeo_Camp.csv +0 -2423
Data/not_active_drumeo_camp.csv +0 -0
Dockerfile +0 -37
Messaging_system/Message_generator_2.py +0 -253
Messaging_system/MultiMessage.py +0 -406
Messaging_system/MultiMessage_2.py +0 -412
Messaging_system/Permes.py +0 -202
Messaging_system/PromptEng.py +0 -268
Messaging_system/PromptGenerator_2.py +0 -446
Messaging_system/SnowFlakeConnection.py +0 -262
Messaging_system/context_validator.py +0 -302
Messaging_system/protection_layer.py +0 -143
Messaging_system/sending_time.py +0 -69
README.md +0 -16
Singeo_camp.csv +0 -0
ai_messaging_system_v2/Data/test_camp.json +159 -0
ai_messaging_system_v2/Data/test_staff.csv +11 -0
ai_messaging_system_v2/Data/ui_output/.gitkeep +3 -0
ai_messaging_system_v2/Data/ui_output/message_cost.csv +7 -0
ai_messaging_system_v2/Data/ui_output/messages_a_drumeo_20260111_2039.csv +0 -0
ai_messaging_system_v2/Data/ui_output/messages_b_drumeo_20260111_2039.csv +61 -0
{Messaging_system → ai_messaging_system_v2/Messaging_system}/CoreConfig.py +27 -44
{Messaging_system → ai_messaging_system_v2/Messaging_system}/DataCollector.py +264 -43
{Messaging_system → ai_messaging_system_v2/Messaging_system}/Homepage_Recommender.py +0 -0
{Messaging_system → ai_messaging_system_v2/Messaging_system}/LLM.py +226 -291
{Messaging_system → ai_messaging_system_v2/Messaging_system}/LLMR.py +187 -33
{Messaging_system → ai_messaging_system_v2/Messaging_system}/Message_generator.py +207 -90
ai_messaging_system_v2/Messaging_system/Permes.py +412 -0
{Messaging_system → ai_messaging_system_v2/Messaging_system}/PromptGenerator.py +112 -82
ai_messaging_system_v2/Messaging_system/agents/README.md +518 -0
ai_messaging_system_v2/Messaging_system/agents/__init__.py +20 -0
ai_messaging_system_v2/Messaging_system/agents/agent_orchestrator.py +234 -0
ai_messaging_system_v2/Messaging_system/agents/base_agent.py +82 -0
ai_messaging_system_v2/Messaging_system/agents/generator_agent.py +470 -0
ai_messaging_system_v2/Messaging_system/agents/rejection_logger.py +209 -0
ai_messaging_system_v2/Messaging_system/agents/security_agent.py +459 -0
ai_messaging_system_v2/README.md +489 -0
ai_messaging_system_v2/UI_MODE_GUIDE.md +495 -0
ai_messaging_system_v2/configs/README.md +363 -0
Messaging_system/StoreLayer.py → ai_messaging_system_v2/configs/__init__.py +0 -0
ai_messaging_system_v2/configs/config_loader.py +208 -0
ai_messaging_system_v2/configs/drumeo/__init__.py +0 -0

.dockerignore CHANGED Viewed

@@ -1,8 +1,8 @@
-# Ignore the .streamlit directory and its contents
-Config_files/mysql_credentials.json
-Config_files/secrets.json
-Config_files/snowflake_credentials_Danial.json
-.streamlit/secrets.toml
-# Ignore the .env file
 .env

+**/__pycache__/
+**/*.pyc
+.git
+.gitignore
 .env
+*.log
+dist
+build

.gitignore DELETED Viewed

@@ -1,8 +0,0 @@
-# Ignore the .streamlit directory and its contents
-Config_files/mysql_credentials.json
-Config_files/secrets.json
-Config_files/snowflake_credentials_Danial.json
-.streamlit/secrets.toml
-# Ignore the .env file
-.env

.idea/AI_Message_Generator.iml CHANGED Viewed

@@ -1,8 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.9 (musora-machine-learning-messaging-project)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

.idea/misc.xml CHANGED Viewed

@@ -3,5 +3,5 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.9 (AI_Message_Generator)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
 </project>

   <component name="Black">
     <option name="sdkName" value="Python 3.9 (AI_Message_Generator)" />
   </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (musora-machine-learning-messaging-project)" project-jdk-type="Python SDK" />
 </project>

.streamlit/config.toml DELETED Viewed

@@ -1,3 +0,0 @@
-[server]
-enableXsrfProtection = false
-enableCORS = false

CIO/CIO_integration_Python.py DELETED Viewed

@@ -1,146 +0,0 @@
-import http.client
-import json
-import pandas as pd
-import logging
-import base64
-import requests
-from customerio import CustomerIO, Regions
-class CustomerIOIntegration:
-    def __init__(self, site_id, api_key):
-        self.cio = CustomerIO(site_id=site_id, api_key=api_key)
-        logging.basicConfig(level=logging.INFO)
-        # Authentication
-        self.site_id = site_id
-        self.api_key = api_key
-        # Base URL for Customer.io App API endpoints (used for segments management)
-        self.base_url = "https://api.customer.io/v1"
-        # Create Basic Auth header
-        auth_b64 = base64.b64encode(f"{self.site_id}:{self.api_key}".encode('utf-8')).decode('utf-8')
-        self.headers = {
-            "Authorization": f"Basic {auth_b64}",
-            "Content-Type": "application/json"
-        }
-    def add_attributes(self, dataframe):
-        # Filter out rows without messages or cio_id
-        filtered_df = dataframe.dropna(subset=['ai_generated_message', 'email'])
-        for index, row in filtered_df.iterrows():
-            try:
-                self.cio.identify(id=row['email'], ai_generated_message=row['ai_generated_message'])
-                logging.info(f"Successfully updated user {row['email']} with message")
-            except Exception as e:
-                logging.error(f"Failed to update user {row['email']}: {e}")
-    def get_segment(self, segment_name):
-        # Step 1: Check if the segment exists
-        resp = requests.get(f"{self.base_url}/segments", headers=self.headers)
-        if resp.status_code != 200:
-            raise Exception(f"Error fetching segments: {resp.text}")
-        segments = resp.json()  # assuming a list of segments is returned
-        segment_id = None
-        for seg in segments:
-            if seg.get("name") == segment_name:
-                segment_id = seg.get("id")
-                break
-        return segment_id
-    def update_segment_from_dataframe(self, df: pd.DataFrame,
-                                      segment_name: str,
-                                      segment_description: str) -> str:
-        """
-        Given a pandas DataFrame, create (if needed) and update a Customer.io manual segment.
-        The DataFrame must contain an "email" column (used as the unique identifier) plus other columns
-        that become customer attributes.
-        Parameters:
-          df: DataFrame containing customer data.
-          segment_name: The name of the segment to create or update.
-          segment_description: A description for the segment (used when creating it).
-        Returns:
-          The segment ID (as returned by the API).
-        """
-        segment_id = self.get_segment(segment_name)
-        # If segment does not exist, create it
-        if segment_id is None:
-            payload = {
-                "name": segment_name,
-                "description": segment_description,
-                "type": "manual"  # manual segments require that you add customers explicitly
-            }
-            resp = requests.post(f"{self.base_url}/segments", headers=self.headers, data=json.dumps(payload))
-            if resp.status_code not in (200, 201):
-                raise Exception(f"Error creating segment: {resp.text}")
-            segment = resp.json()
-            segment_id = segment.get("id")
-            print(f"Segment '{segment_name}' created with ID: {segment_id}")
-        else:
-            print(f"Segment '{segment_name}' already exists with ID: {segment_id}")
-        # Step 2: For each row in the DataFrame, update the customer profile.
-        # We use the "email" column as the id.
-        for index, row in df.iterrows():
-            email = row["email"]
-            # Prepare a dictionary of attributes (all columns except email)
-            attrs = row.drop("email").to_dict()
-            # Use the customer.io client to create or update the profile.
-            # Note: any keyword argument you pass becomes a custom attribute.
-            self.cio.identify(id=email, **attrs)
-        # Step 3: Add all customers (emails) from the DataFrame to the segment.
-        customer_ids = df["email"].tolist()
-        payload = {
-            "ids": customer_ids,
-            "id_type": "email"  # since we use emails as the identifier
-        }
-        resp = requests.put(f"{self.base_url}/segments/{segment_id}", headers=self.headers, data=json.dumps(payload))
-        if resp.status_code != 200:
-            raise Exception(f"Error adding customers to segment: {resp.text}")
-        print(f"Successfully updated segment '{segment_name}' with {len(customer_ids)} customers.")
-        return segment_id
-def load_config_(file_path):
-    """
-    Loads configuration JSON files from the local space. (mostly for loading the Snowflake connection parameters)
-    :param file_path: local path to the JSON file
-    :return: JSON file
-    """
-    with open(file_path, 'r') as file:
-        return json.load(file)
-# Example usage
-if __name__ == "__main__":
-    data = pd.DataFrame({'email': ['[email protected]'],
-                         'message': ['This is the second test message'],
-                         'json_att': [{"message": "test", "url": "test"}]})
-    df = pd.DataFrame(data)
-    secrets_file = 'Config_files/secrets.json'
-    secrets = load_config_(secrets_file)
-    track_api_key = secrets["MUSORA_CUSTOMER_IO_TRACK_API_KEY"]
-    site_id = secrets["MUSORA_CUSTOMER_IO_SITE_ID"]
-    api_key = secrets["MUSORA_CUSTOMER_IO_APP_API_KEY"]
-    workspace_id = secrets["MUSORA_CUSTOMER_IO_WORKSPACE_ID"]
-    cio_integration = CustomerIOIntegration(api_key=track_api_key, site_id=site_id)
-    # Update (or create) the segment
-    segment_id = cio_integration.update_segment_from_dataframe(df,
-                                               segment_name="Danial_ Manual Segment _ AI",
-                                               segment_description="Customers imported from DataFrame")
-    print(f"Segment ID: {segment_id}")

Config_files/message_system_config.json DELETED Viewed

@@ -1,35 +0,0 @@
-{
-  "user_info_features": [
-      "first_name",
-      "country",
-      "instrument",
-      "biography",
-      "birthday_reminder",
-      "topics",
-      "genres",
-      "last_completed_content"
-    ],
-  "interaction_features": ["last_content_info"],
-  "check_feasibility": [
-    "first_name",
-    "biography",
-    "birthday",
-    "topics",
-    "genres"
-  ],
-  "AI_Jargon": ["elevate", "enhance", "ignite", "reignite", "rekindle", "rediscover","passion", "boost", "fuel", "thrill", "revive", "spark", "performing", "fresh", "tone", "enthusiasm", "illuminate"],
-  "singeo_banned_phrases": "    Banned phrases:\n    Voice is NOT an instrument, so avoid phrases like below:\n    - Your voice is waiting\n    - Your voice awaits\n    - Your voice needs you\n    - Your voice is calling\n    - Your voice deserves more\n    - Hit the high notes / Hit those notes\n    - ...\n",
-  "AI_phrases_singeo": ["your voice deserves more"],
-  "header_limit": 30,
-  "message_limit": 135,
-  "LLM_models": ["gpt-4o-mini", "gpt-5-mini", "gpt-5-nano", "gemini-2.5-flash", "gemini-2.0-flash","claude-3-5-haiku-latest", "google/gemma-3-27b-instruct/bf-16"],
-  "openai_models": ["gpt-4o-mini", "gpt-4o", "gpt-4.1-nano", "gpt-3.5-turbo", "gpt-4.1-mini", "gpt-5-mini", "gpt-5-nano"],
-  "reasoning": ["o1", "o4-mini", "o1-mini", "o3-mini", "gpt-5-mini", "gpt-5-nano"],
-  "ollama_models": ["deepseek-r1:1.5b", "gemma3:4b", "deepseek-r1:7b", "gemma3:4b"],
-  "claude_models": ["claude-3-5-haiku-latest"],
-  "inference_models": ["google/gemma-3-27b-instruct/bf-16", "meta-llama/llama-3.2-11b-instruct/fp-16"],
-  "google_models": ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.0-flash"]
-}

Config_files/singeo_phrases.txt DELETED Viewed

@@ -1,25 +0,0 @@
-Let’s get our vocal cords warmed up.
-Time to dive back into vocal practice.
-Let’s work on finding your true voice.
-Just relax, breathe, and sing.
-It’s time to practice our vocal exercises.
-Let’s sing something together.
-Let’s get those voices sounding fantastic.
-We’ll start with some easy warm-ups to get comfortable.
-Don’t worry about sounding perfect – just relax and enjoy the feeling of singing.
-Let’s unlock the power of your voice.
-Get comfortable and confident with your own voice.
-Before long you’ll be singing confidently.
-It’s time to begin developing your own unique voice.
-At the end of this practice you will start to feel the difference in your voice.
-With a little practice, you will be confidently singing the songs you love.
-The voice is a muscle, and it’s important to train it properly.
-With good habits and practice, singing becomes more effortless.
-It’s time to achieve vocal freedom.
-Get comfortable and confident with your own voice.
-If you sing every day, you’ll start to notice improvements very soon.
-Get nice and loose for this vocal warm-up.
-A good practice habit is the best way to quickly get better at singing.
-Start small and build a practice habit from there.
-When you finish this practice, take a minute to celebrate your success.
-Let’s have fun with these vocal warm-up routines.

Data/Singeo_Camp.csv DELETED Viewed

@@ -1,2423 +0,0 @@
-user_id
-823594
-490901
-515383
-738659
-405746
-826858
-830514
-708823
-700096
-489602
-807335
-846269
-456315
-800896
-609793
-826307
-613971
-825354
-669861
-523917
-735710
-700202
-840829
-734243
-763676
-643793
-818742
-302590
-499800
-716448
-769525
-847737
-479101
-540891
-446726
-363055
-743017
-848335
-705473
-701070
-495042
-656355
-779786
-676878
-794869
-844044
-827970
-430625
-739409
-810790
-775511
-811495
-808519
-532378
-608445
-701667
-789421
-163690
-678395
-404721
-746421
-768924
-721958
-817271
-836460
-699316
-510608
-453523
-833655
-781672
-314754
-849303
-810720
-494223
-847456
-839757
-693488
-836162
-805625
-476787
-844524
-747934
-838128
-734830
-807586
-851405
-149912
-842532
-559012
-164357
-815713
-810869
-752975
-733281
-828436
-513557
-797340
-642719
-642869
-669591
-813698
-777254
-398191
-616835
-590393
-555188
-623332
-837023
-777846
-846779
-510346
-408599
-495594
-607317
-773438
-788691
-701334
-415906
-661126
-483253
-411767
-844693
-843344
-773175
-802679
-772799
-727070
-747532
-828448
-727763
-847610
-771985
-773524
-165152
-609872
-776660
-682607
-780445
-573793
-153159
-626408
-411564
-708388
-836703
-745343
-726006
-771271
-848091
-641630
-832240
-735901
-559381
-780435
-846720
-493231
-764017
-730459
-786647
-803793
-408523
-784226
-697162
-836837
-824994
-750670
-448544
-829329
-790279
-722657
-612059
-826567
-757847
-603183
-506591
-846162
-690777
-833627
-844260
-588385
-733786
-717260
-812919
-503894
-630100
-463325
-737462
-668319
-795273
-843246
-849499
-581648
-155212
-840153
-791259
-749093
-751829
-840130
-591118
-437140
-389366
-824226
-683245
-570665
-509855
-775494
-431499
-430336
-362939
-426802
-773757
-804626
-439681
-409011
-152842
-682166
-320112
-582871
-336584
-771283
-402069
-527950
-424730
-638352
-155728
-617756
-346700
-483219
-607205
-732002
-477743
-691258
-434743
-529165
-330043
-622033
-529290
-160698
-498601
-151586
-825306
-522815
-266543
-527590
-772152
-475201
-316516
-474763
-478124
-646712
-504354
-776480
-577798
-219993
-689888
-801362
-489651
-428722
-538821
-837192
-773565
-365543
-518644
-502264
-168502
-173544
-793873
-342607
-849725
-792925
-681845
-348059
-769500
-573430
-774603
-432365
-769633
-435278
-506422
-496848
-637282
-391164
-521431
-504766
-748512
-843534
-417465
-839155
-496403
-775852
-644957
-402315
-404292
-635168
-680425
-547984
-455873
-688802
-651571
-179345
-833093
-224454
-340633
-558447
-638432
-834048
-846206
-635364
-726260
-815259
-826627
-848105
-830959
-685538
-758889
-668269
-739666
-506010
-815330
-846254
-553275
-454499
-265835
-772933
-547424
-772562
-793888
-619788
-506322
-638817
-760751
-609402
-490814
-761786
-845620
-609916
-671055
-827730
-729361
-711958
-573164
-504473
-840408
-604810
-760760
-156674
-528863
-620355
-409083
-796885
-762290
-764323
-814876
-833921
-587492
-764701
-776556
-771445
-673717
-461055
-610853
-840835
-149853
-606064
-628062
-394595
-407917
-802720
-777875
-558869
-753886
-738757
-502669
-528920
-814865
-619115
-687962
-809784
-770461
-600711
-847598
-572181
-681817
-551223
-515764
-724633
-277382
-638779
-847595
-775659
-785785
-428762
-715942
-591901
-366729
-739424
-784349
-535431
-731078
-830298
-542037
-664458
-769695
-704898
-846071
-398730
-360732
-460593
-777080
-492467
-634693
-415064
-360230
-657762
-850521
-572438
-400059
-746719
-426122
-848017
-678376
-841610
-737346
-739712
-839233
-509636
-536485
-760548
-819858
-397254
-492387
-526490
-625800
-637910
-792458
-820801
-287072
-670546
-277307
-776543
-583218
-553139
-849611
-621132
-339605
-825339
-840561
-502668
-594181
-775449
-843680
-534695
-157386
-610424
-817223
-545897
-451469
-572567
-717350
-790818
-761108
-775873
-778351
-754644
-681999
-799473
-729857
-793350
-808406
-507571
-774482
-563356
-728631
-581828
-835184
-680499
-621659
-748517
-347059
-782526
-737999
-701912
-809973
-350152
-453628
-700524
-394419
-809594
-809863
-353735
-808645
-746506
-826657
-579376
-452590
-453846
-733110
-448444
-795397
-163687
-576950
-773859
-308899
-637266
-760925
-497688
-585293
-845012
-659328
-568397
-450808
-670046
-694740
-363928
-615096
-772991
-396929
-397067
-588312
-150378
-474580
-433165
-457270
-394429
-544240
-312241
-337048
-472068
-775102
-663723
-357332
-724027
-694874
-767677
-737172
-518516
-536744
-582622
-244298
-581361
-543312
-387110
-529344
-788314
-543701
-506526
-483897
-310676
-641239
-679283
-762154
-694879
-569673
-168868
-444835
-182975
-509972
-534055
-549621
-835864
-734032
-387802
-341561
-751726
-775924
-587812
-582850
-829059
-779006
-303410
-795790
-327260
-477325
-510625
-452891
-425579
-830117
-843459
-767912
-744531
-534853
-331308
-568474
-849322
-610974
-783468
-166174
-149681
-641651
-398207
-472902
-664543
-700783
-407866
-685933
-850098
-619377
-522953
-742673
-496309
-777520
-721273
-527102
-566265
-807588
-830943
-162449
-821730
-628142
-708433
-776546
-676950
-734593
-444803
-295680
-436760
-173080
-831699
-776055
-819906
-834334
-717288
-149770
-154282
-753709
-815416
-520570
-834940
-350516
-832773
-777094
-847805
-307461
-728520
-740103
-711989
-800647
-520849
-795953
-683950
-733885
-426856
-622050
-849340
-602125
-632679
-794347
-804676
-725603
-818957
-833010
-623557
-769905
-594243
-438299
-833736
-691429
-836559
-691015
-840558
-506540
-617941
-584575
-754369
-534601
-846088
-528696
-761024
-330153
-307555
-396562
-750888
-508567
-151802
-734070
-767156
-836241
-524750
-828197
-792594
-796263
-820525
-817186
-848414
-791055
-594299
-555274
-781784
-317310
-453867
-672955
-727739
-831862
-726247
-832588
-847392
-599048
-829889
-565766
-500667
-745657
-833682
-849229
-756664
-839347
-489625
-728982
-738367
-533542
-502479
-838379
-484562
-776600
-654420
-822835
-819608
-698109
-764174
-770429
-816694
-709270
-642279
-836356
-846010
-642369
-499681
-488377
-631284
-842806
-811504
-844909
-699904
-808604
-845179
-768156
-822402
-839600
-592647
-847946
-621974
-510991
-690307
-652407
-602254
-812810
-825611
-617476
-549193
-520057
-364669
-680674
-822284
-697333
-807871
-708806
-807522
-822326
-546417
-459922
-439475
-833903
-749240
-809839
-566165
-802092
-520330
-459279
-494887
-733006
-739312
-545163
-446670
-619851
-820355
-834851
-777699
-603175
-479862
-845962
-776709
-789739
-457957
-541961
-829285
-751659
-626323
-606199
-691067
-494324
-323457
-603476
-744417
-671088
-576961
-358017
-403844
-326754
-776511
-685880
-693446
-771967
-349273
-696862
-852247
-748948
-346764
-625792
-772579
-366953
-449463
-326311
-438014
-346844
-597546
-806144
-813393
-342050
-773709
-321738
-328501
-595429
-773449
-591029
-460155
-641822
-166803
-763327
-845476
-822023
-781884
-741054
-511204
-699718
-793533
-808019
-491001
-792658
-494888
-776029
-521260
-494204
-844062
-356833
-266105
-606727
-345167
-501030
-412994
-404301
-776269
-403394
-808760
-315194
-658960
-505915
-449270
-633334
-542929
-350232
-425562
-360205
-271932
-397433
-364021
-360365
-510908
-307275
-596945
-325866
-777720
-557407
-695251
-524088
-619432
-715088
-539235
-840134
-818341
-777645
-510826
-526989
-151579
-680528
-156563
-666287
-365127
-487458
-829542
-581611
-151327
-534844
-593451
-698638
-776159
-849287
-396453
-701017
-842796
-767432
-496240
-606439
-775731
-330010
-728101
-651101
-392751
-728781
-813848
-548589
-737238
-612591
-754112
-152761
-843964
-851370
-735063
-821770
-556217
-366536
-676958
-154958
-567444
-723202
-736335
-346375
-840101
-489737
-837522
-728778
-830871
-836203
-840705
-809916
-534108
-848917
-422044
-540889
-415273
-485544
-442195
-563450
-771964
-798123
-607302
-363529
-771000
-777862
-797376
-161678
-626817
-793449
-814504
-699891
-731931
-686492
-696739
-843905
-396113
-810423
-717652
-773335
-811026
-367447
-796125
-536785
-694050
-520031
-821315
-710498
-720216
-558567
-836458
-475394
-489700
-763598
-710254
-723656
-408460
-771012
-647613
-758453
-569227
-764580
-744268
-498461
-771363
-739290
-850615
-763206
-411073
-750224
-737147
-402696
-845312
-304639
-844427
-639105
-827832
-731574
-803975
-662014
-574987
-823860
-352961
-794503
-839994
-792877
-806297
-550251
-837708
-790542
-789096
-819267
-697430
-522691
-847312
-422009
-800656
-461067
-780691
-805126
-782840
-336618
-358903
-805888
-847694
-722474
-712014
-513154
-637999
-830316
-701961
-821374
-775922
-827994
-835198
-545611
-825894
-555626
-621989
-778372
-818225
-778251
-154313
-810427
-268298
-839425
-815456
-430109
-497987
-544180
-843623
-719716
-362472
-606479
-835281
-346314
-610282
-841099
-740082
-689022
-460398
-839446
-696731
-830905
-659046
-490092
-846372
-765558
-423855
-840977
-499785
-713359
-816990
-159141
-639232
-736766
-778314
-737275
-809757
-770279
-158961
-840799
-712005
-690625
-697847
-730307
-835502
-551116
-840491
-748326
-563983
-843631
-602224
-416699
-337501
-349907
-445611
-563571
-171769
-583464
-844083
-602123
-483231
-778269
-416031
-524574
-440594
-532863
-330874
-553756
-621807
-642600
-330218
-331494
-812449
-408664
-354720
-603354
-453801
-772422
-474464
-270702
-751822
-462621
-456320
-170343
-393699
-634607
-822841
-832233
-595526
-358241
-708138
-354438
-671007
-537994
-352344
-502262
-536674
-778036
-296369
-692366
-711647
-557263
-638830
-606347
-422092
-532633
-575155
-425440
-294441
-422825
-516509
-581198
-776259
-779585
-617604
-824109
-678011
-561583
-437991
-667578
-774370
-699248
-792204
-152661
-565665
-517642
-670020
-472852
-618636
-479414
-566493
-512002
-329292
-388820
-671724
-617726
-474736
-848035
-773861
-848198
-262389
-515870
-666521
-435059
-160141
-357156
-354153
-577568
-754455
-538399
-773162
-774964
-829167
-352486
-729390
-416243
-210256
-678118
-483211
-463489
-325276
-307002
-774534
-764333
-774735
-831473
-593532
-578456
-457953
-707019
-839802
-630050
-800415
-829507
-680790
-838764
-839518
-720132
-829687
-652538
-457196
-326422
-850059
-775165
-609235
-686553
-616530
-603411
-798750
-727465
-412806
-791309
-795437
-351855
-460218
-758040
-317838
-718434
-608517
-620030
-777598
-706089
-523436
-447068
-740285
-572280
-772627
-753956
-528122
-588822
-599880
-699150
-433901
-567134
-838503
-751496
-544867
-756979
-343980
-709145
-491805
-831222
-194758
-765088
-516829
-165231
-499768
-735669
-850411
-405549
-804406
-752903
-841381
-794698
-833250
-579185
-524593
-825366
-458244
-815934
-772913
-683495
-584807
-757426
-836155
-835115
-808149
-777807
-695826
-808334
-808679
-828769
-778433
-450449
-836067
-827948
-753377
-799557
-720331
-570707
-798407
-750244
-565263
-777485
-795890
-535202
-837373
-452341
-713659
-746514
-729526
-721325
-689053
-720771
-197126
-735816
-534917
-455170
-833637
-489544
-795877
-849816
-485751
-602190
-733130
-788490
-846042
-846255
-712508
-770568
-824825
-460591
-506006
-751825
-843299
-540958
-849591
-745552
-771295
-728321
-705989
-155950
-443314
-756951
-762908
-849898
-835359
-729308
-304071
-696399
-834602
-569686
-420919
-438723
-391850
-774233
-834762
-729452
-774969
-679020
-715416
-821849
-419825
-827978
-816552
-759529
-797211
-463580
-615596
-801606
-785093
-841406
-661159
-561392
-847685
-822594
-557748
-761928
-841793
-643369
-804247
-782776
-583802
-712950
-575310
-796615
-722349
-781751
-436524
-510269
-437761
-777042
-506448
-258998
-733687
-834927
-457292
-570132
-311220
-551959
-569995
-828964
-413817
-365300
-640342
-408746
-574908
-703756
-841092
-428366
-362743
-779165
-814151
-502492
-680634
-594887
-409555
-733254
-610485
-764870
-534955
-762394
-460589
-687841
-418079
-839673
-434494
-679834
-272533
-434588
-804302
-449622
-775812
-300120
-395952
-155031
-471907
-572932
-359665
-550289
-550403
-431921
-760602
-430841
-366889
-157681
-848962
-826441
-563822
-843214
-553960
-624883
-565638
-577780
-431172
-361908
-773685
-593330
-541065
-736795
-577891
-160559
-293346
-822731
-269580
-499837
-705653
-390930
-157210
-430128
-827738
-542944
-448815
-540805
-619866
-358346
-454917
-164268
-839565
-772151
-405436
-776012
-337449
-514344
-502224
-567378
-596266
-166418
-478773
-805097
-350812
-745551
-457693
-825654
-499938
-438923
-521837
-363145
-773596
-430777
-761625
-698752
-723477
-168443
-495904
-448159
-156281
-393016
-434645
-839436
-327029
-812542
-303393
-845571
-433623
-834055
-775164
-568650
-449642
-747488
-332951
-487824
-686845
-166894
-770763
-851009
-615852
-845541
-817628
-761792
-723505
-767853
-694782
-837217
-842573
-605551
-339431
-771309
-602919
-434193
-759161
-544248
-496581
-648933
-828083
-412870
-840671
-830250
-685200
-830469
-845839
-453697
-827482
-724288
-399710
-825553
-351893
-367398
-844401
-591452
-403387
-344199
-773663
-784620
-689351
-602059
-685654
-841930
-767634
-765370
-569897
-334395
-585247
-670045
-363151
-635351
-547507
-504921
-672165
-523987
-807279
-816851
-758521
-586662
-406132
-795745
-755621
-847213
-486315
-831696
-273995
-843422
-828454
-621291
-642480
-823558
-597420
-725965
-834417
-733173
-687533
-434024
-779510
-155902
-706661
-771245
-613716
-510306
-753585
-472940
-754376
-671807
-775768
-311394
-638502
-675477
-474162
-611352
-773172
-761951
-674808
-308752
-547174
-327358
-558322
-847738
-712433
-531413
-846014
-616993
-709678
-791903
-503901
-734598
-493008
-735644
-737120
-392563
-730235
-626019
-828974
-508935
-798521
-847092
-844869
-601205
-835032
-830129
-657434
-720864
-760642
-606614
-768254
-479280
-785339
-834105
-819206
-772552
-538198
-738311
-443019
-821509
-613153
-774364
-579087
-553589
-729227
-836426
-840382
-404686
-268652
-665534
-732888
-330883
-625976
-841175
-338514
-829697
-511748
-551622
-750997
-849183
-570855
-766345
-154064
-772310
-849936
-615498
-692115
-517561
-417479
-831200
-834171
-746940
-434913
-753067
-299937
-150674
-738375
-336019
-166687
-345727
-360946
-530905
-742921
-436877
-690061
-402743
-339479
-828216
-837992
-154208
-586160
-829174
-570194
-704866
-694971
-338142
-429780
-151422
-473416
-834159
-350911
-732558
-795909
-581380
-523262
-495509
-630550
-397410
-487279
-534654
-532824
-351531
-486714
-461608
-841522
-683190
-618523
-752213
-453634
-415146
-227791
-603654
-777531
-421667
-614655
-612611
-586305
-682812
-173469
-563880
-397527
-414444
-605840
-735451
-502730
-424688
-440572
-460504
-776378
-606145
-678077
-408619
-731714
-652419
-595248
-775727
-340744
-460354
-720490
-607602
-775761
-543968
-767639
-803730
-631663
-149785
-583545
-611544
-489988
-152922
-733228
-591431
-642203
-728875
-543446
-846297
-527581
-744708
-775564
-153730
-166391
-436116
-565800
-152027
-664178
-731548
-760546
-707788
-350423
-626941
-522563
-697439
-779912
-239933
-555016
-496846
-598598
-273513
-659404
-415055
-832241
-721827
-552338
-487612
-386853
-642602
-708097
-361685
-512935
-798360
-823250
-662149
-159051
-826653
-435323
-759520
-813021
-562437
-743755
-752606
-829857
-726499
-846473
-749831
-733169
-636818
-527840
-799831
-748320
-847682
-545258
-390245
-681315
-237311
-214008
-699302
-550407
-608228
-723193
-836853
-591341
-831587
-836635
-435025
-602017
-794449
-776672
-830742
-301016
-424760
-453648
-592894
-849022
-580504
-693808
-765963
-838409
-738066
-524286
-678550
-157353
-768755
-813988
-662289
-428231
-847714
-806980
-797709
-805026
-810285
-442671
-843939
-438785
-827891
-745141
-783589
-354135
-505785
-842322
-840193
-802536
-225055
-850374
-771838
-849190
-831838
-483096
-589663
-836727
-612081
-488848
-677432
-723228
-826250
-781770
-740321
-806097
-438250
-852480
-539817
-826028
-443305
-792437
-823053
-732914
-833901
-639505
-758194
-773938
-846057
-569667
-814206
-758294
-841927
-800730
-760790
-416653
-848484
-430472
-713877
-519244
-661733
-846455
-501384
-686894
-804882
-847719
-842887
-825646
-681762
-774345
-822383
-833276
-800907
-581711
-346599
-753006
-761578
-438563
-305361
-715812
-767551
-846229
-782383
-624355
-845961
-847356
-831597
-768090
-844093
-454967
-706662
-436816
-565757
-739106
-834225
-678976
-735138
-767954
-605403
-439883
-487610
-344023
-201579
-428351
-844677
-823777
-558391
-429248
-829767
-345217
-745077
-681571
-677235
-761617
-802193
-787066
-828357
-487174
-832681
-839785
-320283
-723502
-497967
-421222
-824102
-819148
-582853
-518576
-829649
-492100
-500499
-489654
-834335
-495488
-173613
-457296
-495411
-367340
-754308
-649386
-838208
-435193
-751668
-508425
-784876
-670452
-772720
-529578
-840644
-850311
-675479
-774553
-451963
-460507
-822336
-337172
-588733
-479100
-584743
-358101
-697219
-767832
-387287
-543271
-769136
-436940
-771675
-197395
-502407
-638623
-551929
-511588
-355258
-622014
-567538
-721641
-786209
-462361
-611162
-200077
-154641
-451339
-422177
-839787
-363004
-733013
-303530
-738813
-350223
-820285
-774426
-411332
-165090
-548821
-350934
-521641
-337528
-534185
-508419
-771475
-340925
-516097
-505395
-181439
-659416
-849275
-833587
-675628
-594853
-399393
-173337
-694966
-251815
-152806
-411509
-842175
-812900
-427090
-400666
-606965
-514131
-758274
-841169
-838958
-775742
-403734
-670441
-429405
-388103
-687017
-452558
-616402
-771734
-347784
-472860
-803139
-155260
-776662
-501247
-837358
-346174
-398106
-361326
-591345
-695256
-367763
-285533
-696714
-338957
-387326
-403138
-402901
-418077
-590550
-162812
-740260
-793770
-588926
-438320
-840501
-671540
-503800
-608408
-675753
-366430
-843878
-720951
-834504
-611997
-336527
-755145
-687090
-820547
-715811
-152025
-690732
-840934
-777617
-767814
-571810
-761256
-464485
-718041
-675190
-843221
-488540
-748647
-737565
-528774
-612808
-491633
-735886
-450070
-166814
-406151
-428664
-810488
-539078
-716250
-816500
-572096
-821456
-827844
-358615
-824552
-733722
-836242
-847033
-720689
-318712
-510920
-241211
-387189
-839879
-612488
-754809
-608229
-771401
-830060
-452575
-676540
-457070
-771930
-343593
-154377
-426837
-729581
-825466
-828371
-678711
-510434
-821268
-769515
-827434
-550520
-735472
-495290
-642142
-474674
-505558
-749131
-421943
-615298
-611813
-490879
-506761
-832580
-713731
-663750
-393174
-723281
-415140
-689986
-410381
-823928
-346396
-843516
-617143
-475199
-731401
-720876
-808551
-802676
-843016
-778134
-163619
-333509
-725662
-596983
-211229
-759823
-183110
-491302
-569608
-406919
-363308
-515922
-647094
-512435
-620953
-756542
-718583
-693893
-168228
-849118
-709275
-622557
-773334
-830022
-502103
-575613
-750161
-157152
-779488
-501046
-772859
-361728
-659697
-809846
-786289
-322751
-851082
-840770
-599699
-568127
-829141
-711809
-704719
-743663
-839993
-636226
-769501
-630823
-819378
-364136
-729248
-727992
-609614
-667746
-826492
-420645
-631742
-772917
-751921
-585898
-462309
-785612
-713323
-777637
-363789
-846456
-594372
-347118
-806693
-835220
-826332
-841134
-548907
-827088
-774271
-325718
-346950
-846158
-474360
-621056
-484010
-799329
-444224
-355946
-484929
-510299
-271870
-398816
-510122
-511999
-576646
-152332
-408937
-521758
-775032
-722477
-165310
-580513
-519939
-318944
-693062
-731291
-848780
-775955
-472237
-820198
-835683
-547760
-648097
-343936
-777941
-522897
-518144
-394353
-568211
-769152
-153759
-773007
-588551
-737786
-411322
-451734
-774654
-618016
-543893
-414790
-577017
-706018
-520311
-822740
-461517
-347050
-205106
-593906
-763988
-510621
-294981
-643795
-603892
-845705
-630891
-743236
-775712
-428381
-417382
-343237
-637494
-350535
-701582
-757525
-773072
-479456
-602171
-623149
-706580
-834629
-405881
-159060
-606365
-834514
-842375
-510559
-849945
-849094
-754685
-490135
-388292
-622761
-769160
-834816
-632928
-782359
-334975
-754544
-690976
-433374
-678519
-730623
-514396
-541303
-605460
-687273

Data/not_active_drumeo_camp.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

Dockerfile DELETED Viewed

@@ -1,37 +0,0 @@
-# ---------------------------------------------------------------------
-# Base image – use the full tag so `wget` is available for the steps
-FROM python:3.9
-# ---------------------------------------------------------------------
-# 1. Create UID-1000 account *and its home directory*.
-RUN useradd -m -u 1000 user
-# Environment: declare the home dir now (some HF-injected commands
-# look at $HOME) but stay root for the next layers.
-ENV HOME=/home/user \
-    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PATH="$HOME/.local/bin:$PATH"
-# ---------------------------------------------------------------------
-# 2. Install Python dependencies **as root** so the console scripts
-#    land in /usr/local/bin (already on PATH at runtime).
-WORKDIR /app
-COPY requirements.txt /tmp/reqs.txt
-RUN pip install --no-cache-dir -r /tmp/reqs.txt \
- && rm /tmp/reqs.txt
-# ---------------------------------------------------------------------
-# 3. Switch to the non-root user for the final image,
-#    then copy the source tree.
-USER user
-WORKDIR $HOME/app
-COPY --chown=user . .
-# ---------------------------------------------------------------------
-# 4. Launch: $PORT is set by the platform at runtime; fall back to 8501
-#    for local docker runs.
-CMD streamlit run app.py \
-    --server.port=${PORT:-8501} \
-    --server.headless true \
-    --server.address 0.0.0.0

Messaging_system/Message_generator_2.py DELETED Viewed

@@ -1,253 +0,0 @@
-"""
-THis class will generate message or messages based on the number of requested.
-"""
-import json
-import time
-from openai import OpenAI
-from tqdm import tqdm
-import streamlit as st
-from Messaging_system.MultiMessage_2 import MultiMessage
-from Messaging_system.protection_layer import ProtectionLayer
-import openai
-from Messaging_system.LLM import LLM
-class MessageGenerator:
-    def __init__(self, CoreConfig):
-        self.Core = CoreConfig
-        self.llm = LLM(CoreConfig)
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def generate_messages(self, progress_callback):
-        """
-        generating messages based on prompts for each user
-        :return: updating message column for each user
-        """
-        total_users = len(self.Core.users_df)
-        st.write("Generating messages ... ")
-        self.Core.start_time = time.time()
-        for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages")):
-            # if we have a prompt to generate a personalized message
-            # Update progress if callback is provided
-            if progress_callback is not None:
-                progress_callback(progress, total_users)
-            if row["prompt"] is not None:
-                first_message = self.llm.get_response(prompt=row["prompt"], instructions=self.llm_instructions())
-                if first_message is not None:
-                    # adding protection layer
-                    # protect = ProtectionLayer(CoreConfig=self.Core)
-                    # message, total_tokens = protect.criticize(message=first_message, user=row)
-                    message = first_message
-                    # updating tokens
-                    # self.Core.total_tokens['prompt_tokens'] += total_tokens['prompt_tokens']
-                    # self.Core.total_tokens['completion_tokens'] += total_tokens['completion_tokens']
-                    # self.Core.temp_token_counter += total_tokens['prompt_tokens'] + total_tokens['completion_tokens']
-                    # double check output structure
-                    if isinstance(message, dict) and "message" in message and isinstance(message["message"], str):
-                        # parsing output result
-                        message = self.parsing_output_message(message, row)
-                        self.Core.users_df.at[idx, "message"] = message
-                        row["message"] = message
-                    else:
-                        self.Core.users_df.at[idx, "message"] = None
-                    self.Core.checkpoint()
-                    self.Core.respect_request_ratio()
-                else:
-                    self.Core.users_df.at[idx, "message"] = None
-            # generating subsequence messages if needed:
-            if isinstance(self.Core.subsequence_messages, dict) and len(self.Core.subsequence_messages.keys()) > 1 and \
-                    self.Core.users_df.at[idx, "message"] is not None and row["message"] is not None:
-                MM = MultiMessage(self.Core)
-                message = MM.generate_multi_messages(row)
-                self.Core.users_df.at[idx, "message"] = message
-            else:
-            # ---------------------------------------------------------
-            # SINGLE-MESSAGE path
-            # ---------------------------------------------------------
-                single_msg = row["message"] or self.Core.users_df.at[idx, "message"]
-                if single_msg is not None:
-                    # If the single message is still a JSON string, turn it into a dict first
-                    if isinstance(single_msg, str):
-                        try:
-                            single_msg = json.loads(single_msg)
-                        except json.JSONDecodeError:
-                            # leave it as-is if it’s not valid JSON
-                            pass
-                    msg_wrapper = {"messages_sequence": [single_msg]}
-                    # Again, store a proper JSON string
-                    self.Core.users_df.at[idx, "message"] = json.dumps(msg_wrapper,
-                                                                       ensure_ascii=False)
-                else:
-                    self.Core.users_df.at[idx, "message"] = None
-        return self.Core
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def parsing_output_message(self, message, user):
-        """
-        Parses the output JSON from the LLM and enriches it with additional content information if needed.
-        :param message: Output JSON from LLM (expected to have at least a "message" key)
-        :param user: The user row
-        :return: Parsed and enriched output as a JSON object
-        """
-        if self.Core.messaging_mode == "message":
-            # Only "message" is expected when messaging mode is message and we are not recommending any other content from input
-            if "message" not in message or "header" not in message:
-                print("LLM output is missing 'message'.")
-                return None
-            output_message = {"header": message["header"], "message": message["message"], "web_url_path": user["recsys_result"]}
-        if self.Core.messaging_mode == "recsys_result":
-            output_message = self.fetch_recommendation_data(user, message)
-        elif self.Core.messaging_mode == "recommend_playlist":
-            # adding playlist url to the message
-            if "playlist_id" in message and "message" in message:
-                playlist_id = str(message["playlist_id"])
-                web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
-                # Add these to the message dict
-                output_message = {
-                    "header": message["header"],
-                    "message": message["message"],
-                    "playlist_id": int(message["playlist_id"]),
-                    "web_url_path": web_url_path,
-                }
-        return json.dumps(output_message, ensure_ascii=False)
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def fetch_recommendation_data(self, user, message):
-        user_id = user["user_id"]
-        content_id = int(user["recommendation"])
-        recsys_json_str = user["recsys_result"]
-        recsys_data = json.loads(recsys_json_str)
-        # Initialize variables to store found item and category
-        found_item = None
-        # Search through all categories in the recsys data
-        for category, items in recsys_data.items():
-            for item in items:
-                if item.get("content_id") == content_id:
-                    found_item = item
-                    break  # Exit inner loop if item is found
-            if found_item:
-                break  # Exit outer loop if item is found
-        if not found_item:
-            print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
-            return None
-        # Extract required fields from found_item
-        web_url_path = found_item.get("web_url_path")
-        title = found_item.get("title")
-        thumbnail_url = found_item.get("thumbnail_url")
-        message["message"].replace('\\', '').replace('"', '')
-        # Add these to the message dict
-        output_message = {
-            "header": message.get("header"),
-            "message": message.get("message"),
-            "content_id": content_id,
-            "web_url_path": web_url_path,
-            "title": title,
-            "thumbnail_url": thumbnail_url
-        }
-        return output_message
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def llm_instructions(self):
-        """
-        Setting instructions for llm
-        :return: instructions as string
-        """
-        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
-        # instructions = f"""
-        # You are a copywriter. Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student that sounds like natural everyday speech: friendly, concise, no jargon, and following the instructions.
-        # Write a SUPER CASUAL and NATURAL push notification, as if you are chatting over coffee. Avoid odd phrasings.
-        #
-        # ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-        # the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-        #
-        # Banned word:
-        # {jargon_list}
-        #
-        # Banned phrases:
-        # Voice is NOT an instrument, so avoid phrases like below:
-        # - Your voice is waiting
-        # - Your voice awaits
-        # - Your voice needs you
-        # - Your voice is calling
-        # - Your voice deserves more
-        # - Hit the high notes / Hit those notes
-        # - ...
-        # """
-        instructions = f"""
-You are a copywriter. Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student. It is critical  that the message sounds like natural, everyday speech: friendly, concise, no jargon, and it must follow the instructions.
-Write a SUPER CASUAL and NATURAL push notification, as if you are chatting over coffee. Avoid odd phrasings. The message should sound like something that a {self.Core.get_instrument()} instructor would realistically say to a student in a daily conversation. Here are some examples of things that an instructor would realistically say to a student, to give you a general sense of tone and phrasing:
-Common instructor phrases:
-{self.Core.brand_voice}
-ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-Banned word:
-{jargon_list}
-Banned phrases:
-Voice is NOT an instrument, so avoid phrases like below:
-- Your voice is waiting
-- Your voice awaits
-- Your voice needs you
-- Your voice is calling
-- Your voice deserves more
-- Hit the high notes / Hit those notes
-- ...
-"""
-        banned = """
-ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-Banned word:
-{jargon_list}
-Banned phrases:
-Voice is NOT an instrument, so avoid phrases like below:
-- Your voice is waiting
-- Your voice awaits
-- Your voice needs you
-- Your voice is calling
-- Your voice deserves more
-- Hit the high notes / Hit those notes
-- ...
-"""
-        return instructions

Messaging_system/MultiMessage.py DELETED Viewed

@@ -1,406 +0,0 @@
-import json
-import random
-import time
-import pandas as pd
-from openai import OpenAI
-from Messaging_system.LLMR import LLMR
-from Messaging_system.PromptGenerator import PromptGenerator
-from Messaging_system.PromptEng import PromptEngine
-from Messaging_system.protection_layer import ProtectionLayer
-import openai
-from Messaging_system.LLM import LLM
-from copy import deepcopy
-from Messaging_system.Homepage_Recommender import DefaultRec
-class MultiMessage:
-    def __init__(self, CoreConfig):
-        """
-        Class that generates a sequence of messages (multi-step push notifications)
-        for each user, building on previously generated messages.
-        """
-        self.Core = CoreConfig
-        self.llm = LLM(CoreConfig)
-        self.defaultRec = DefaultRec(CoreConfig)
-        self.promptGen=PromptGenerator(self.Core)
-        if self.Core.involve_recsys_result:
-            self.llmr = LLMR(CoreConfig, random=True)
-    # ==============================================================
-    def generate_multi_messages(self, user):
-        """
-        Generates multiple messages per user, storing them in a single JSON structure.
-        The first message is assumed to already exist in user["message"].
-        Subsequent messages are generated by referencing all previously generated ones.
-        """
-        first_message_str = user.get("message", None)
-        if first_message_str is None:
-            print("No initial message found; cannot build a multi-message sequence.")
-            return None
-        try:
-            first_message_dict = json.loads(first_message_str)
-        except (json.JSONDecodeError, TypeError):
-            print("Could not parse the first message as JSON. Returning None.")
-            return None
-        message_sequence = [first_message_dict]
-        # how many total messages you want (self.Core.subsequence_messages is a dict)
-        total_configured = len(self.Core.subsequent_examples) + 1 # includes the first
-        to_generate = max(0, total_configured - 1)
-        # figure out DF index once
-        idx = self._get_user_idx(user)
-        for i in range(to_generate):
-            # The ordinal number of the next message in the sequence (first was #1)
-            msg_number = i + 2
-            # ---- (A) pick the next recommendation BEFORE generating the text if required ----
-            recommendation_info = content_info = recsys_json = None
-            zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}
-            if getattr(self.Core, "involve_recsys_result", False):
-                rec_info, cinfo, rjson = self.select_next_recommendation(user)
-                recommendation_info, content_info, recsys_json = rec_info, cinfo, rjson
-                if recommendation_info is None:
-                    # fallback
-                    content_id = self.defaultRec.recommendation
-                    content_info = self.defaultRec.recommendation_info
-                    recsys_json = self.defaultRec.for_you_url
-                # Update DF and local user snapshot
-                user = self._update_user_fields(idx, user,{
-                    "recommendation": recommendation_info,
-                    "recommendation_info": content_info,
-                    "recsys_result": recsys_json
-                })
-            # ---- (B) actually generate the next message; hand it the UPDATED user ----
-            next_msg_raw = self.generate_next_messages(message_sequence, msg_number, user)
-            if next_msg_raw is None:
-                print(f"Could not generate the message for step {msg_number}. Stopping.")
-                break
-            # If you have a protection layer, call it here (omitted for brevity)
-            criticized_msg = next_msg_raw
-            # ---- (C) Parse & validate ----
-            parsed_output_str = self.parsing_output_message(criticized_msg, user)
-            if not parsed_output_str:
-                print(f"Parsing output failed for step {msg_number}. Stopping.")
-                break
-            try:
-                parsed_output_dict = json.loads(parsed_output_str)
-            except json.JSONDecodeError:
-                print(f"Could not parse the new message as JSON for step {msg_number}. Stopping.")
-                break
-            message_sequence.append(parsed_output_dict)
-        final_structure = {"messages_sequence": message_sequence}
-        return json.dumps(final_structure, ensure_ascii=False)
-    # --------------------------------------------------------------
-    def generate_next_messages(self, previous_messages, step, user):
-        """
-        Uses only the last two previously generated messages to produce the next message.
-        Returns a *raw* dictionary (header, message, etc.) from the LLM.
-        :param previous_messages: A list of dicts, each containing at least "header" and "message".
-        :param step: The 1-based index of the message we’re about to generate.
-        :return: A dictionary from LLM (with 'header' and 'message'), or None if generation fails.
-        """
-        # Only keep up to the last two messages
-        if len(previous_messages) > 2:
-            context = previous_messages[-2:]
-        else:
-            context = previous_messages
-        # 1) Build a prompt that includes only those last two messages
-        prompt = self.generate_prompt(context, step, user)
-        # new_prompt = self.engine.prompt_engineering(prompt)
-        # 2) Call our existing LLM routine
-        response_dict = self.llm.get_response(prompt=prompt, instructions=self.llm_instructions())
-        return response_dict
-    # ===============================================================
-    def get_examples(self, step):
-        """
-        providing examples and instructions
-        :return:
-        """
-        if self.Core.subsequent_examples is not None:
-            instructions = f"""
-# ** Example **
-Below are some acceptable examples of the voice we want. Create a header and message that follow the same style, tone, vocabulary, and characteristics.
-Mimic the example style as much as possible and make it personalized using provided information.
-### **Good Examples:**
-{self.Core.subsequent_examples[step]}
-            """
-            return instructions
-        else:
-            return ""
-    # --------------------------------------------------------------
-    def generate_prompt(self, previous_messages, step, user):
-        """
-        Creates a prompt to feed to the LLM, incorporating 3 previously generated messages.
-        :param previous_messages: A list of dicts, each containing 'header' and 'message'.
-        :return: A user-facing prompt string instructing the model to produce a new message.
-        """
-        # Build a textual summary of previous messages - last three
-        recent_messages = previous_messages[-3:]
-        previous_text = []
-        for i, m in enumerate(recent_messages, start=1):
-            header = m.get("header", "").strip()
-            body   = m.get("message", "").strip()
-            previous_text.append(f"Message {i}:\n header: {header}\n           message: {body}")
-        # Combine into a single string
-        previous_text_str = "\n\n".join(previous_text)
-        user_info = self.promptGen.get_user_profile(user=user)
-        input_context = self.promptGen.input_context()
-        instructions = self.message_instructions(step)
-        recommendation_instructions = self.promptGen.recommendations_instructions(user)
-        output_instructions = self.promptGen.output_instruction()
-        examples = self.get_examples(step)
-        # Craft the prompt
-        prompt = f"""
-We have previously sent these push notifications to the user and The user has not re-engaged yet:
-** Previously sent push notifications: **
-{previous_text_str}
-** Objective**:
-Write the *next* follow up personalized push notification following the instructions and what we know about the user.
-{input_context}
-- **Don't** use any emojis if we used emojis in previous messages.
-- The "header" **MUST BE** different from the headers that we previously sent and we should not have similar words, variations and phrases from previous messages.
-- The "message" **MUST BE** different from the messages that we previously sent and we should not have similar words, variations and phrases from previous messages.
-{instructions}
-{user_info}
-{recommendation_instructions}
-{examples}
-{output_instructions}
-"""
-        return prompt
-    # =============================================================
-    def message_instructions(self, step):
-        """
-        message instructions for each step
-        :return:
-        """
-        instructions= f"""
-# **specific instructions**:
-- {self.Core.subsequence_messages[step]}
-        """
-        return instructions
-        # =============================================================================
-    def parsing_output_message(self, message, user):
-        """
-        Parses the output JSON from the LLM and enriches it with additional content
-        information if needed (e.g., from recsys). Re-uses the logic from the single-message
-        pipeline to keep the results consistent.
-        :param message: Output JSON *dictionary* from the LLM (with at least "message" and "header").
-        :param user: The user row dictionary.
-        :return: A valid JSON string or None if the structure is invalid.
-        """
-        if self.Core.involve_recsys_result:
-            # If recsys is used, fetch recommendation data
-            output_message = self.fetch_recommendation_data(user, message)
-        elif self.Core.messaging_mode == "recommend_playlist":
-            # If recommending a playlist, add the relevant fields
-            if "playlist_id" in message and "message" in message:
-                playlist_id = str(message["playlist_id"])
-                web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
-                output_message = {
-                    "header": message.get("header", ""),
-                    "message": message.get("message", ""),
-                    "playlist_id": int(message["playlist_id"]),
-                    "web_url_path": web_url_path,
-                }
-            else:
-                print("LLM output is missing either 'playlist_id' or 'message'.")
-                return None
-        else:
-            # Basic scenario: Only 'header' and 'message' expected
-            if "message" not in message or "header" not in message:
-                print("LLM output is missing 'header' or 'message'.")
-                return None
-            output_message = {
-                "header": message["header"],
-                "message": message["message"]
-            }
-        return json.dumps(output_message, ensure_ascii=False)
-    # --------------------------------------------------------------
-    def fetch_recommendation_data(self, user, message):
-        if user["recommendation"] == "for_you":
-            output_message = {
-                "header": message.get("header"),
-                "message": message.get("message"),
-                "content_id": None,
-                "web_url_path": user["recsys_result"],
-                "title": user["recommendation"],
-                "thumbnail_url": None
-            }
-        else:
-            try:
-                recommendation_dict = user["recommendation"]
-                content_id = int(recommendation_dict["content_id"])
-                # Extract required fields from found_item
-                web_url_path = recommendation_dict["web_url_path"]
-                title = recommendation_dict["title"]
-                thumbnail_url = recommendation_dict["thumbnail_url"]
-                msg = message.get("message")
-                if isinstance(msg, str):
-                    msg = msg.replace('\\', '').replace('"', '')
-                else:
-                    msg = str(msg)  # or handle it differently if this shouldn't happen
-                message["message"] = msg
-                # message["message"].replace('\\', '').replace('"', '')
-                # Add these to the message dict
-                output_message = {
-                    "header": message.get("header"),
-                    "message": message.get("message"),
-                    "content_id": content_id,
-                    "web_url_path": web_url_path,
-                    "title": title,
-                    "thumbnail_url": thumbnail_url
-                }
-                return output_message
-            except:
-                raise ValueError(f"Parsed response is not a dict: \n\n {message}")
-    # ===============================================================
-    def _remove_from_all(self, recsys_dict, cid):
-        for sec, recs in list(recsys_dict.items()):
-            if isinstance(recs, list):
-                recsys_dict[sec] = [r for r in recs if r.get("content_id") != cid]
-        return recsys_dict
-    # ===============================================================
-    def _lookup_content_info(self, cid):
-        row = self.Core.content_info[self.Core.content_info["content_id"] == cid]
-        return row["content_info"].iloc[0] if not row.empty else None
-    # ===============================================================
-    def select_next_recommendation(self, user):
-        """
-        Select next recommendation from the user's current recsys_result.
-        Returns: content_id, content_info, updated_recsys_json
-        """
-        self.llmr.user = user  # _get_recommendation expects self.user to be set
-        cid, cinfo, updated_json, _ = self.llmr._get_recommendation()
-        return cid, cinfo, updated_json
-    # ==============================================================
-    def _get_user_idx(self, u):
-        # If it's a Series, its index label is usually the row index
-        if isinstance(u, pd.Series) and u.name in self.Core.users_df.index:
-            return u.name
-        # Otherwise try a stable key like user_id (change if your key is different)
-        key_col = "user_id" if "user_id" in self.Core.users_df.columns else None
-        if key_col and key_col in u:
-            matches = self.Core.users_df.index[self.Core.users_df[key_col] == u[key_col]]
-            if len(matches):
-                return matches[0]
-        # Fallback: try exact row equality (last resort; slower)
-        try:
-            return self.Core.users_df.index[self.Core.users_df.eq(pd.Series(u)).all(1)][0]
-        except Exception:
-            return None
-    # =============================================================
-    def _update_user_fields(self, idx, user, fields: dict):
-        """Update DF row and return a fresh copy of the user row (Series) with those fields reflected."""
-        if idx is None:
-            # no index? just mutate the local dict/Series
-            for k, v in fields.items():
-                user[k] = v
-            return user
-        for k, v in fields.items():
-            self.Core.users_df.at[idx, k] = v
-        return self.Core.users_df.loc[idx]
-    # =======================================================================
-    def llm_instructions(self):
-        """
-        Setting instructions for llm
-        :return: instructions as string
-        """
-        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
-        banned_phrases = self.Core.config_file.get(f"{self.Core.brand}_banned_phrases", None)
-        instructions = f"""
-    You are a copywriter. Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student. It is critical  that the message sounds like natural, everyday speech: friendly, concise, no jargon, and it must follow the instructions.
-    Write a SUPER CASUAL and NATURAL push notification, as if you are chatting over coffee. Avoid odd phrasings. The message should sound like something that a {self.Core.get_instrument()} instructor would realistically say to a student in a daily conversation.
-    """
-        if self.Core.brand_voice is not None:
-            instructions += f"""
-    Here are some examples of things that an instructor would realistically say to a student, to give you a general sense of tone and phrasing:
-    Common instructor phrases:
-    {self.Core.brand_voice}
-    """
-        rules = f"""
-    ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-    the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-    - **important Note:** header **must be** less than {self.Core.config_file["header_limit"]} characters and message **must be less** than {self.Core.config_file["message_limit"]} characters.
-    - The "header" **MUST BE** different from the headers that we previously sent and we should not have similar words, variations and phrases from previous messages.
-    - The "message" **MUST BE** different from the messages that we previously sent and we should not have similar words, variations and phrases from previous messages.
-    Banned word:
-    {jargon_list}
-    """
-        if banned_phrases is not None:
-            rules += banned_phrases
-        final_instructions = f"""
-{instructions}
-{rules}
-            """
-        return final_instructions

Messaging_system/MultiMessage_2.py DELETED Viewed

@@ -1,412 +0,0 @@
-import json
-import time
-from openai import OpenAI
-from Messaging_system.PromptGenerator_2 import PromptGenerator
-from Messaging_system.PromptEng import PromptEngine
-from Messaging_system.protection_layer import ProtectionLayer
-import openai
-from Messaging_system.LLM import LLM
-class MultiMessage:
-    def __init__(self, CoreConfig):
-        """
-        Class that generates a sequence of messages (multi-step push notifications)
-        for each user, building on previously generated messages.
-        """
-        self.Core = CoreConfig
-        self.llm = LLM(CoreConfig)
-        self.engine = PromptEngine(self.Core)
-        self.promptGen = PromptGenerator(self.Core)
-    # --------------------------------------------------------------
-    def generate_multi_messages(self, user):
-        """
-        Generates multiple messages per user, storing them in a single JSON structure.
-        The first message is assumed to already exist in user["message"].
-        Subsequent messages are generated by referencing all previously generated ones.
-        :param user: A row (dictionary-like) containing user data and the first message.
-        :return: JSON string containing the entire sequence of messages
-                 (or None if something goes wrong).
-        """
-        # 1) Get the first message if it exists
-        first_message_str = user.get("message", None)
-        if not first_message_str:
-            print("No initial message found; cannot build a multi-message sequence.")
-            return None
-        # Parse the first message as JSON
-        try:
-            first_message_dict = json.loads(first_message_str)
-        except (json.JSONDecodeError, TypeError):
-            print("Could not parse the first message as JSON. Returning None.")
-            return None
-        # Start our sequence with the first message
-        message_sequence = [first_message_dict]
-        # We'll reuse the same ProtectionLayer
-        # protect = ProtectionLayer(
-        #     CoreConfig=self.Core
-        # )
-        # If user requested multiple messages, generate the rest
-        # number_of_messages is the *total* number of messages requested
-        total_to_generate = len(self.Core.subsequence_messages.keys())
-        # Already have the first message, so generate the next (n-1) messages
-        for step in range(1, total_to_generate + 1):
-            # 2) Generate the next message referencing all so-far messages
-            next_msg_raw = self.generate_next_messages(message_sequence, step+1, user)
-            if not next_msg_raw:
-                print(f"Could not generate the message for step {step}. Stopping.")
-                break
-            # 3) Pass it through the protection layer
-            # criticized_msg, tokens_used = protect.criticize(
-            #     message=next_msg_raw,
-            #     user=user
-            # )
-            criticized_msg = next_msg_raw
-            # Update token usage stats
-            # self.Core.total_tokens['prompt_tokens'] += tokens_used['prompt_tokens']
-            # self.Core.total_tokens['completion_tokens'] += tokens_used['completion_tokens']
-            # self.Core.temp_token_counter += tokens_used['prompt_tokens'] + tokens_used['completion_tokens']
-            # 4) Parse & validate the next message (we do the same as the single-message pipeline)
-            parsed_output_str = self.parsing_output_message(criticized_msg, user)
-            if not parsed_output_str:
-                print(f"Parsing output failed for step {step}. Stopping.")
-                break
-            try:
-                parsed_output_dict = json.loads(parsed_output_str)
-            except json.JSONDecodeError:
-                print(f"Could not parse the new message as JSON for step {step}. Stopping.")
-                break
-            # Add this next message to our sequence
-            message_sequence.append(parsed_output_dict)
-        # 5) Return the entire sequence so it can be stored back in the DataFrame or elsewhere
-        final_structure = {"messages_sequence": message_sequence}
-        return json.dumps(final_structure, ensure_ascii=False)
-    # --------------------------------------------------------------
-    def generate_next_messages(self, previous_messages, step, user):
-        """
-        Uses only the last two previously generated messages to produce the next message.
-        Returns a *raw* dictionary (header, message, etc.) from the LLM.
-        :param previous_messages: A list of dicts, each containing at least "header" and "message".
-        :param step: The 1-based index of the message we’re about to generate.
-        :return: A dictionary from LLM (with 'header' and 'message'), or None if generation fails.
-        """
-        # Only keep up to the last two messages
-        if len(previous_messages) > 2:
-            context = previous_messages[-2:]
-        else:
-            context = previous_messages
-        # 1) Build a prompt that includes only those last two messages
-        prompt = self.generate_prompt(context, step, user)
-        # new_prompt = self.engine.prompt_engineering(prompt)
-        # 2) Call our existing LLM routine
-        response_dict = self.llm.get_response(prompt=prompt, instructions=self.llm_instructions())
-        return response_dict
-    # ===============================================================
-    def get_examples(self, step):
-        """
-        providing examples and instructions
-        :return:
-        """
-        if self.Core.subsequent_examples is not None:
-            instructions = f"""
-# ** Example **
-Below are some acceptable examples of the voice we want. Create a header and message that follow the same style, tone, vocabulary, and characteristics.
-Mimic the example style as much as possible and make it personalized using provided information.
-### **Good Examples:**
-    {self.Core.subsequent_examples[step]}
-"""
-            return instructions
-        else:
-            return ""
-    # --------------------------------------------------------------
-    def generate_prompt(self, previous_messages, step, user):
-        """
-        Creates a prompt to feed to the LLM, incorporating 3 previously generated messages.
-        :param previous_messages: A list of dicts, each containing 'header' and 'message'.
-        :return: A user-facing prompt string instructing the model to produce a new message.
-        """
-        # Build a textual summary of previous messages - last three
-        recent_messages = previous_messages[-3:]
-        previous_text = []
-        for i, m in enumerate(recent_messages, start=1):
-            header = m.get("header", "").strip()
-            body   = m.get("message", "").strip()
-            previous_text.append(f"Message {i}: (Header) {header}\n           (Body) {body}")
-        # Combine into a single string
-        previous_text_str = "\n\n".join(previous_text)
-        user_info = self.promptGen.get_user_profile(user=user)
-        input_context = self.promptGen.input_context()
-        output_instructions = self.output_instruction()
-        general_specifications = self.general_specifications()
-        examples = self.get_examples(step)
-        # Craft the prompt
-        prompt = f"""
-We have previously sent these push notifications to the user and The user has not re-engaged yet:
-** Previous messages **
-{previous_text_str}
-**Objective**
-Write the *next* follow up personalized push notification following the instructions and what we know about the user.
-{input_context}
-{user_info}
-### ** General Specifications: **
-{general_specifications}
-# **specific instructions**:
-- {self.Core.subsequence_messages[step]}
-{examples}
-{output_instructions}
-"""
-        return prompt
-    # ===========================================================================
-    def general_specifications(self):
-        """
-        general_specifications
-        :return: instructions
-        """
-        instructions = """
-- Start directly with the message content without greetings or closing phrases.
-- Avoid using same or similar words so close together in "message" and "header", and make sure there is no grammar problem.
-- message and header **MUST** be different from previous messages in terms of similar words, vocabulary and phrases.
-- The message, vocabulary and sentences **MUST** sound like a natural conversation: something that people normally say in daily conversations.
-        """
-        return instructions
-    # =============================================================================
-    def output_instruction(self):
-        """
-        :return: output instructions as a string
-        """
-        # Provide constraints for our next push notification
-        header_limit = self.Core.config_file.get("header_limit", 50)
-        message_limit = self.Core.config_file.get("message_limit", 200)
-        general_instructions = f"""
-- The "header" must be less than {header_limit} character.
-- The "message" must be less than {message_limit} character.
-- Don't use emoji if we used emoji in our previous messages.,
-- if we didn't have emojis in previous message, you are **ONLY ALLOWED** to use {self.Core.get_emoji()} emoji if needed. (ONLY ONCE, and ONLY at the end of header or message).
-- Ensure that the output is a valid JSON and not include any text outside the JSON code block.
-"""
-        instructions = f"""
-Expected output structure:
-{{
-  "header": "Generated title",
-  "message": "Generated message",
-}}
-{general_instructions}
-"""
-        return instructions
-    # --------------------------------------------------------------
-    def parsing_output_message(self, message, user):
-        """
-        Parses the output JSON from the LLM and enriches it with additional content
-        information if needed (e.g., from recsys). Re-uses the logic from the single-message
-        pipeline to keep the results consistent.
-        :param message: Output JSON *dictionary* from the LLM (with at least "message" and "header").
-        :param user: The user row dictionary.
-        :return: A valid JSON string or None if the structure is invalid.
-        """
-        if self.Core.involve_recsys_result:
-            # If recsys is used, fetch recommendation data
-            output_message = self.fetch_recommendation_data(user, message)
-        elif self.Core.messaging_mode == "recommend_playlist":
-            # If recommending a playlist, add the relevant fields
-            if "playlist_id" in message and "message" in message:
-                playlist_id = str(message["playlist_id"])
-                web_url_path = f"https://www.musora.com/{self.Core.brand}/playlist/{playlist_id}"
-                output_message = {
-                    "header": message.get("header", ""),
-                    "message": message.get("message", ""),
-                    "playlist_id": int(message["playlist_id"]),
-                    "web_url_path": web_url_path,
-                }
-            else:
-                print("LLM output is missing either 'playlist_id' or 'message'.")
-                return None
-        else:
-            # Basic scenario: Only 'header' and 'message' expected
-            if "message" not in message or "header" not in message:
-                print("LLM output is missing 'header' or 'message'.")
-                return None
-            output_message = {
-                "header": message["header"],
-                "message": message["message"]
-            }
-        return json.dumps(output_message, ensure_ascii=False)
-    # --------------------------------------------------------------
-    def fetch_recommendation_data(self, user, message):
-        """
-        Extracts recommendation data from user's recsys_result and merges it into the given
-        message dictionary. Identical to single-message usage.
-        :param user: The user row (with 'recsys_result', 'recommendation', etc.).
-        :param message: Dictionary with at least "header" and "message".
-        :return: Enriched dict (header, message, content_id, web_url_path, title, thumbnail_url)
-        """
-        user_id = user["user_id"]
-        content_id = int(user["recommendation"])
-        recsys_json_str = user["recsys_result"]
-        recsys_data = json.loads(recsys_json_str)
-        # Initialize variable to store found item
-        found_item = None
-        for category, items in recsys_data.items():
-            for item in items:
-                if item.get("content_id") == content_id:
-                    found_item = item
-                    break
-            if found_item:
-                break
-        if not found_item:
-            print(f"content_id {content_id} not found in recsys_data for user_id {user_id}.")
-            return None
-        web_url_path = found_item.get("web_url_path")
-        title = found_item.get("title")
-        thumbnail_url = found_item.get("thumbnail_url")
-        # Construct final dictionary
-        output_message = {
-            "header": message.get("header"),
-            "message": message.get("message", "").replace('\\', '').replace('"', ''),
-            "content_id": content_id,
-            "web_url_path": web_url_path,
-            "title": title,
-            "thumbnail_url": thumbnail_url
-        }
-        return output_message
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def llm_instructions(self):
-        """
-        Setting instructions for llm
-        :return: instructions as string
-        """
-        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
-        # instructions = f"""
-        # You are a copywriter. Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student that sounds like natural everyday speech: friendly, concise, no jargon, and following the instructions.
-        # Write a SUPER CASUAL and NATURAL push notification, as if you are chatting over coffee. Avoid odd phrasings.
-        #
-        # ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-        # the header and the message **MUST NOT** contain any banned word or phrases (case-insensitive; singular, plural, verb forms, or their derivatives)
-        #
-        # Banned word:
-        # {jargon_list}
-        #
-        # Banned phrases:
-        # Voice is NOT an instrument, so avoid phrases like below:
-        # - Your voice is waiting
-        # - Your voice awaits
-        # - Your voice needs you
-        # - Your voice is calling
-        # - Your voice deserves more
-        # - Hit the high notes / Hit those notes
-        # - ...
-        #
-        # """.strip()
-        instructions = f"""
-You are a copywriter. Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student. It is critical  that the message sounds like natural, everyday speech: friendly, concise, no jargon, and it must follow the instructions.
-Write a SUPER CASUAL and NATURAL push notification, as if you are chatting over coffee. Avoid odd phrasings. The message should sound like something that a {self.Core.get_instrument()} instructor would realistically say to a student. Here are some examples of things that an instructor would realistically say to a student, to give you a general sense of tone and phrasing:
-Common instructor phrases:
-{self.Core.brand_voice}
-ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-Banned word:
-{jargon_list}
-Banned phrases:
-Voice is NOT an instrument, so avoid phrases like below:
-- Your voice is waiting
-- Your voice awaits
-- Your voice needs you
-- Your voice is calling
-- Your voice deserves more
-- Hit the high notes / Hit those notes
-- ...
-        """
-        banned = """
-ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-Banned word:
-{jargon_list}
-Banned phrases:
-Voice is NOT an instrument, so avoid phrases like below:
-- Your voice is waiting
-- Your voice awaits
-- Your voice needs you
-- Your voice is calling
-- Your voice deserves more
-- Hit the high notes / Hit those notes
-- ...
-        """
-        return instructions

Messaging_system/Permes.py DELETED Viewed

@@ -1,202 +0,0 @@
-"""
-the flow of the Program starts from create_personalized_message function
-"""
-import time
-from tqdm import tqdm
-from Messaging_system.DataCollector import DataCollector
-from Messaging_system.CoreConfig import CoreConfig
-from Messaging_system.LLMR import LLMR
-import streamlit as st
-from Messaging_system.Message_generator import MessageGenerator
-from Messaging_system.PromptGenerator import PromptGenerator
-from Messaging_system.SnowFlakeConnection import SnowFlakeConn
-from Messaging_system.Homepage_Recommender import DefaultRec
-class Permes:
-    """
-    LLM-based personalized message generator:
-    """
-    def create_personalize_messages(self, session, users, brand, config_file, openai_api_key,
-                                    platform="push", number_of_messages=1, instructionset=None, subsequent_examples=None
-                                    , recsys_contents=None, model=None, identifier_column="user_id", segment_info=None,
-                                    sample_example=None, number_of_samples=None, message_style=None, involve_recsys_result=False,
-                                    messaging_mode="message", ongoing_df=None, personalization=True,
-                                    progress_callback=None, segment_name="no_recent_activity"):
-        """
-        creating personalized messages for the input users given the parameters for both app and push platform.
-        :param session: snowflake connection object
-        :param users: users dataframe
-        :param brand
-        :param config_file
-        :param openai_api_key
-        :param CTA: call to action for the messages
-        :param segment_info: common information about the users
-        :param message_style: style of the message
-        :param sample_example: a sample for one shot prompting
-        :return:
-        """
-        # primary processing
-        users = self.identify_users(users_df=users, identifier_column=identifier_column)
-        personalize_message = CoreConfig(session=session,
-                                         users_df=users,
-                                         brand=brand,
-                                         platform=platform,
-                                         config_file=config_file)
-        personalize_message.set_openai_api(openai_api_key)
-        personalize_message.set_segment_name(segment_name=segment_name)
-        personalize_message.set_number_of_messages(number_of_messages=number_of_messages,
-                                                   instructionset=instructionset,
-                                                   subsequent_examples=subsequent_examples)
-        if sample_example is not None:  # Check if sample_example is not empty
-            personalize_message.set_sample_example(sample_example)
-        if number_of_samples is not None:
-            personalize_message.set_number_of_samples(number_of_samples)
-        if model is not None:
-            personalize_message.set_llm_model(model)
-        if segment_info is not None:
-            personalize_message.set_segment_info(segment_info)
-        if message_style is not None or message_style != "":
-            personalize_message.set_message_style(message_style)
-        if personalization:
-            personalize_message.set_personalization()
-        if involve_recsys_result:
-            personalize_message.set_messaging_mode("recsys_result")
-            personalize_message.set_involve_recsys_result(involve_recsys_result)
-        # if messaging_mode != "message":
-        #     personalize_message.set_messaging_mode(messaging_mode)
-        if recsys_contents:
-            personalize_message.set_recsys_contents(recsys_contents)
-        users_df = self._create_personalized_message(CoreConfig=personalize_message, progress_callback=progress_callback)
-        total_prompt_tokens = personalize_message.total_tokens["prompt_tokens"]
-        total_completion_tokens = personalize_message.total_tokens["completion_tokens"]
-        total_cost = self.calculate_cost(total_prompt_tokens, total_completion_tokens, model)
-        print(f"Estimated Cost (USD): {total_cost:.5f} ---> Number of messages: {(len(users_df) * number_of_messages)}")
-        st.write(f"Estimated Cost (USD): {total_cost:.5f} ---> Number of messages: {(len(users_df) * number_of_messages)}")
-        scale_price = (total_cost * 1000) / (len(users_df) * number_of_messages)
-        print(f"Estimated Cost (USD) for 1000 messages: {scale_price}")
-        st.write(f"Estimated Cost (USD) for 1000 messages: {scale_price}")
-        return users_df
-    # -----------------------------------------------------
-    def calculate_cost(self, total_prompt_tokens, total_completion_tokens, model):
-        input_price, output_price = self.get_model_price(model)
-        total_cost = ((total_prompt_tokens / 1000000) * input_price) + (
-                (total_completion_tokens / 1000000) * output_price)  # Cost calculation estimation
-        return total_cost
-    # ====================================================
-    def get_model_price(self, model):
-        """
-        getting the input price and output price per 1m token for the requested model
-        :param model:
-        :return:
-        """
-        input_prices = {
-            "gpt-4o-mini":0.15,
-            "gpt-4.1-mini":0.4,
-            "gpt-5-mini": 0.25,
-            "gpt-5-nano": 0.05,
-            "gemini-2.5-flash":0.3,
-            "gemini-2.0-flash":0.1,
-            "gemini-2.5-flash-lite":0.1,
-            "claude-3-5-haiku-latest": 0.8,
-            "google/gemma-3-27b-instruct/bf-16": 0.15
-        }
-        out_prices = {
-            "gpt-4o-mini":0.6,
-            "gpt-4.1-mini":1.6,
-            "gpt-5-mini": 2,
-            "gpt-5-nano": 0.4,
-            "gemini-2.5-flash":2.5,
-            "gemini-2.0-flash":0.7,
-            "gemini-2.5-flash-lite":0.4,
-            "claude-3-5-haiku-latest": 3,
-            "google/gemma-3-27b-instruct/bf-16":0.3
-        }
-        i_price = input_prices.get(model, 0)
-        o_price= out_prices.get(model, 0)
-        return i_price, o_price
-    # ====================================================
-    def identify_users(self, users_df, identifier_column):
-        """
-        specifying the users for identification
-        :param identifier_column:
-        :return: updated users
-        """
-        if identifier_column.upper() == "EMAIL":
-            return users_df
-        else:
-            users_df.rename(columns={identifier_column: "USER_ID"}, inplace=True)
-            return users_df
-    # ------------------------------------------------------------------
-    def _create_personalized_message(self, CoreConfig, step=1, progress_callback=None):
-        """
-        main function of the class to flow the work between functions inorder to create personalized messages.
-        :return: updated users_df with extracted information and personalize messages.
-        """
-        # Collecting all the data that we need to personalize messages
-        datacollect = DataCollector(CoreConfig)
-        CoreConfig = datacollect.gather_data()
-        # generating recommendations for users, if we want to include recommendations in the message
-        if CoreConfig.involve_recsys_result and CoreConfig.messaging_mode != "message":
-            Recommender = LLMR(CoreConfig, random=True)
-            CoreConfig = Recommender.get_recommendations(progress_callback)
-        else:
-            # We only want to generate the message and redirect them to For You section or Homepage
-            Recommender = DefaultRec(CoreConfig)
-            CoreConfig = Recommender.get_recommendations()
-        # generating proper prompt for each user
-        prompt = PromptGenerator(CoreConfig)
-        CoreConfig = prompt.generate_prompts()
-        # generating messages for each user
-        message_generator = MessageGenerator(CoreConfig)
-        CoreConfig = message_generator.generate_messages(progress_callback)
-        # Eliminating rows where we don't have a valid message (null, empty, or whitespace only)
-        CoreConfig.users_df = CoreConfig.users_df[CoreConfig.users_df["message"].str.strip().astype(bool)]
-        CoreConfig.checkpoint()
-        # closing snowflake connection
-        # CoreConfig.session.close()
-        return CoreConfig.users_df

Messaging_system/PromptEng.py DELETED Viewed

@@ -1,268 +0,0 @@
-"""
-This is the prompt engineering layer to modifty the prompt for better perfromance
-"""
-import openai
-from fontTools.ttLib.tables.ttProgram import instructions
-from openai import OpenAI
-from Messaging_system.LLM import LLM
-import os
-import streamlit as st
-from google.genai import types
-from google import genai
-class PromptEngine:
-    def __init__(self, coreconfig):
-        self.Core=coreconfig
-        self.llm=LLM(self.Core)
-    # ============================================================
-    def get_credential(self, key):
-        return os.getenv(key) or st.secrets.get(key)
-    # =============================================================
-    def prompt_engineering(self, prompt):
-        """
-        prompt engineering layer to modify the prompt as needed
-        :param prompt:
-        :return:
-        """
-        new_prompt = f"""
-Modify below prompt following best prompt engineering methods. return only the new prompt as a text.
-modify the prompt and instructions in <original_prompt> tag to maximimize better results by providing the new prompt.
-### Original prompt
-<original_prompt>
-{prompt}
-</original_prompt>
-output the new prompt as text without any additional information.
-        """
-        final_prompt = self.get_final_prompt(new_prompt)
-        return final_prompt
-    # ===========================================================
-    def get_final_prompt(self, prompt):
-        if self.Core.model in self.Core.config_file["openai_models"]:
-            final_prompt = self.get_openai_response(prompt)
-            return final_prompt
-        elif self.Core.model in self.Core.config_file["inference_models"]:
-            final_prompt = self.get_inference_response(prompt)
-            return final_prompt
-        elif self.Core.model in self.Core.config_file["claude_models"]:
-            final_prompt = self.get_claude_response(prompt, self.llm_instructions())
-            return final_prompt
-        elif self.Core.model in self.Core.config_file["google_models"]:
-            final_prompt = self.get_gemini_response(prompt)
-            return final_prompt
-    # ============================================================
-    def llm_instructions(self):
-        system_prompt = """
-        You are a prompt engineer. Rewrite the following prompt to be clearer, more specific, and likely to produce a better response from an LLM following best prompt engineering techniques and styles.
-        """
-        return system_prompt
-    # =============================================================
-    def get_inference_response(self, prompt, max_retries=4):
-        api_key = self.get_credential("inference_api_key")
-        client = OpenAI(
-            base_url="https://api.inference.net/v1",
-            api_key=api_key,
-        )
-        reasoning = self.Core.reasoning_model
-        system_prompt = self.llm_instructions()
-        for attempt in range(max_retries):
-            try:
-                if reasoning:
-                    response = client.chat.completions.create(
-                        model=self.Core.model,
-                        response_format={"type": "text"},
-                        messages=[
-                            {"role": "system", "content": system_prompt},
-                            {"role": "user", "content": prompt}
-                        ],
-                        reasoning_effort="medium",
-                        n=1,
-                    )
-                else:
-                    response = client.chat.completions.create(
-                        model=self.Core.model,
-                        response_format={"type": "text"},
-                        messages=[
-                            {"role": "system", "content": system_prompt},
-                            {"role": "user", "content": prompt}
-                        ],
-                        n=1,
-                        temperature=self.Core.temperature
-                    )
-                tokens = {
-                    'prompt_tokens': response.usage.prompt_tokens,
-                    'completion_tokens': response.usage.completion_tokens,
-                    'total_tokens': response.usage.total_tokens
-                }
-                content = response.choices[0].message.content
-                output = str(content)
-                # validating the JSON
-                self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
-                self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                self.Core.temp_token_counter += tokens['total_tokens']
-                return output
-            except openai.APIConnectionError as e:
-                print("The server could not be reached")
-                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
-            except openai.RateLimitError as e:
-                print("A 429 status code was received; we should back off a bit.")
-            except openai.APIStatusError as e:
-                print("Another non-200-range status code was received")
-                print(e.status_code)
-                print(e.response)
-        print("Max retries exceeded. Returning empty response.")
-        return prompt  # returns original prompt if needed
-    # ===============================================================
-    def get_openai_response(self, prompt, max_retries=4):
-        """
-        sending the prompt to openai LLM and get back the response
-        """
-        openai.api_key = self.Core.api_key
-        client = OpenAI(api_key=self.Core.api_key)
-        reasoning = self.Core.reasoning_model
-        system_prompt = self.llm_instructions()
-        for attempt in range(max_retries):
-            try:
-                if reasoning:
-                    response = client.chat.completions.create(
-                        model=self.Core.model,
-                        response_format={"type": "text"},
-                        messages=[
-                            {"role": "system", "content": system_prompt},
-                            {"role": "user", "content": prompt}
-                        ],
-                        reasoning_effort="medium",
-                        n=1,
-                    )
-                else:
-                    response = client.chat.completions.create(
-                        model=self.Core.model,
-                        response_format={"type": "text"},
-                        messages=[
-                            {"role": "system", "content": system_prompt},
-                            {"role": "user", "content": prompt}
-                        ],
-                        n=1,
-                        temperature=self.Core.temperature
-                    )
-                tokens = {
-                    'prompt_tokens': response.usage.prompt_tokens,
-                    'completion_tokens': response.usage.completion_tokens,
-                    'total_tokens': response.usage.total_tokens
-                }
-                content = response.choices[0].message.content
-                output = str(content)
-                # validating the JSON
-                self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
-                self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                self.Core.temp_token_counter += tokens['total_tokens']
-                return output
-            except openai.APIConnectionError as e:
-                print("The server could not be reached")
-                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
-            except openai.RateLimitError as e:
-                print("A 429 status code was received; we should back off a bit.")
-            except openai.APIStatusError as e:
-                print("Another non-200-range status code was received")
-                print(e.status_code)
-                print(e.response)
-        print("Max retries exceeded. Returning empty response.")
-        return prompt # returns original prompt if needed
-    # ==========================================================================
-    def get_gemini_response(self, prompt, max_retries=4):
-        """
-        Send prompt to Google Gemini LLM and get back the response
-        :param prompt:
-        :param max_retries:
-        :return:
-        """
-        client = genai.Client(api_key=self.get_credential("Google_API"))
-        for attempt in range(max_retries):
-            try:
-                response = client.models.generate_content(
-                    model=self.Core.model,
-                    contents=prompt,
-                    config=types.GenerateContentConfig(
-                        thinking_config=types.ThinkingConfig(thinking_budget=0),
-                        system_instruction=self.llm_instructions(),
-                        temperature=self.Core.temperature,
-                        response_mime_type = "text/plain"  # application/json
-                    ))
-                output = str(response.text)
-                return output
-            except Exception as e:
-                print(f"Error in attempt {attempt}: {e}")
-    # ==========================================================================
-    def get_claude_response(self, prompt, instructions, max_retries=4):
-        """
-        send prompt to claude LLM and get back the response
-        :param prompt:
-        :param instructions:
-        :return:
-        """
-        for attempt in range(max_retries):
-            try:
-                message = self.llm.client.messages.create(
-                    model=self.Core.model,
-                    max_tokens=4096,
-                    system = instructions,
-                    messages=[
-                        {"role": "user", "content": prompt}
-                    ],
-                    temperature=self.Core.temperature
-                )
-                # Try generating the response
-                response = message.content[0].text
-                return response
-            except Exception as e:
-                print(f"Error: {e}")
-        print("Max retries exceeded. Returning empty response.")
-        return prompt # returns original prompt if needed

Messaging_system/PromptGenerator_2.py DELETED Viewed

@@ -1,446 +0,0 @@
-"""
-THis class generate proper prompts for the messaging system
-"""
-import pandas as pd
-from tqdm import tqdm
-from Messaging_system.PromptEng import PromptEngine
-class PromptGenerator:
-    def __init__(self, Core):
-        self.Core = Core
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def generate_prompts(self):
-        """
-        generates a personalized message for each student
-        :return:
-        """
-        # engine = PromptEngine(self.Core)
-        # if we have personalized information about them, we generate a personalized prompt
-        for idx, row in tqdm(self.Core.users_df.iterrows(), desc="generating prompts"):
-            # check if we have enough information to generate a personalized message
-            prompt = self.generate_personalized_prompt(user=row)
-            # new_prompt = engine.prompt_engineering(prompt)
-            # self.Core.users_df.at[idx, "prompt"] = new_prompt
-            self.Core.users_df.at[idx, "prompt"] = prompt
-            self.Core.users_df.at[idx, "source"] = "AI-generated"
-        return self.Core
-    # --------------------------------------------------------------
-    def safe_get(self, value):
-        return str(value) if pd.notna(value) else "Not available"
-    # ==============================================================
-    def get_user_profile(self, user):
-        # additional_info = self.user_additional_info(user)
-        user_info = f"""
-### **User Information:**
-Here is the information about the user:
-- The user is a {str(self.Core.get_instrument())} student.
-- {self.safe_get(self.Core.segment_info)}
-       """
-        ## deleted from profile
-        # first name: {self.safe_get(user.get("first_name"))}
-        #  Weeks since Last interaction:{self.safe_get(user.get("weeks_since_last_interaction"))}
-        # {self.safe_get(additional_info)}
-        # ** User
-        # profile: **
-        #
-        # {self.safe_get(user.get("user_info"))}
-        return user_info
-    # --------------------------------------------------------------
-    def generate_personalized_prompt(self, user):
-        """
-        generate a personalized prompt by putting the information from the user into a template prompt
-        :return: Personalized prompt (string)
-        """
-        input_context = self.input_context()
-        cta = self.CTA_instructions()
-        # if (self.Core.involve_recsys_result and self.Core.messaging_mode !="message") or self.Core.target_content is not None:
-        #     if user["recommendation"] is not None or user["recommendation_info"] is not None:
-        #         recommendations_instructions = self.recommendations_instructions(user=user) + "\n"
-        #     else:
-        #         recommendations_instructions = self.redirect_to_for_you()
-        # else:
-        #     recommendations_instructions = self.redirect_to_for_you()
-        user_info = self.get_user_profile(user=user)
-        # personalize_message_instructions = self.personalize_message_instructions(user)
-        # {personalize_message_instructions}
-        general_instructions = self.message_type_instructions()
-        output_instructions = self.output_instruction()
-        # task_instructions = self.task_instructions()
-        # eliminate {task_instructions} and  {recommendations_instructions}
-        prompt = f"""
-{input_context}
-{user_info}
-{cta}
-{general_instructions}
-{output_instructions}
-    """
-        return prompt
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def input_context(self):
-        """
-        :return: input instructions as a string
-        """
-        context = f"""
-Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student that sounds like everyday natural speech: friendly, short, no jargon by following the instructions.
-The output should sound like something that a {self.Core.get_instrument()} instructor would realistically say to a student in a daily conversation.
-        """
-        if self.Core.brand_voice is not None:
-            context += f"""
-** Examples of actual phrases an instructor might say:**
-{self.Core.brand_voice}
-            """
-        return context
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def CTA_instructions(self):
-        """
-        define CTA instructions
-        :return: CTA instructions (str)
-        """
-        instructions = f"""
-### **Main instructions**
-{self.Core.CTA} \n
-    """
-        return instructions
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def user_additional_info(self, user):
-        """
-        providing additional information given in the input data
-        :param user:
-        :return:
-        """
-        if "additional_info" not in user.index:
-            return ""
-        if pd.notna(user["additional_info"]) and user["additional_info"] not in [None, [], {}] and (
-                not isinstance(user["additional_info"], str) or user["additional_info"].strip()):
-            additional_info = user["additional_info"]
-        else:
-            additional_info = ""
-        return additional_info
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def recommendations_instructions(self, user):
-        """
-        instructions about target recommendation for the user
-        :param user:
-        :return:
-        """
-        instructions_for_recsys = f"""
-    ### ** Recommendations instructions **:
-     Below is the content that we want to recommend to the user:
-     Recommended content: {user["recommendation_info"]}
-    -   Use the **CONTENT_TITLE** naturally in the message if capable, but do not use the exact title verbatim or put it in quotes.
-    -   Naturally mention the **CONTENT_TYPE** for course, workout, and quicktips if capable.
-    -   If the recommended content has an **Artist** with a known full name, use the ** FULL NAME ** naturally in the message if capable. If only the first name of the Artist is available, ** DO NOT ** use it at all.
-    """
-        # need to adjust
-        instructions_for_target_content = """
-        -   Considering the information about the user, and the content that we want to recommend, include the **TITLE** inside single quotes, or use the title naturally without the exact title name and quotes if capable.
-        Naturally mention the **CONTENT_TYPE** for course, workout, quicktips if capable and shortly provide a reasoning why the content is helpful for them.
-        **Target recommended Content**:
-        """
-        instructions = ""
-        if self.Core.involve_recsys_result:
-            instructions += f"""
-    {instructions_for_recsys}
-            """
-        elif self.Core.target_content is not None:
-            # fetching the information related to the target content from content_table
-            target_info = self.get_target_content_info(user)
-            instructions += f"""
-    {instructions_for_target_content}
-    {target_info}
-            """
-        return instructions
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def get_target_content_info(self, user):
-        """
-        fetching information about the target content that we want to recommend to the user
-        :param user: target user
-        :return:
-        """
-        # checking that user[self.target_content] contains a content_id:
-        target_id = int(user[self.Core.target_content])
-        try:
-            # fetching the data for target content (self.target_content column in user)
-            content_info_row = self.Core.content_info.loc[self.Core.content_info['content_id'] == target_id]
-            text = f"""
-        **content_id** : {str(content_info_row["content_id"])}"
-        **content_info** : \n {content_info_row["content_info"]} \n\n"
-        """
-            return text
-        except:
-            print(f"Target content cannot be found in the content database: content_id = {target_id}")
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def personalize_message_instructions(self, user):
-        """
-        :return: personalized message instructions as a string
-        """
-        instructions = """
-    ### ** Personalized Message Specifications **
-    """
-    #     # Name
-    #     if "first_name" in self.Core.list_of_features and pd.notna(user["first_name"]) and user["first_name"] not in [
-    #         None,
-    #         [],
-    #         {}] and (
-    #             not isinstance(user["first_name"], str) or user["first_name"].strip()):
-    #         instructions += f"""
-    # - Address the user by their first name in 'header' casually (only first letter capital) to make the message more personal. \n
-    # """
-    #     else:
-    #         instructions += """
-    # - If the user's name is not available or invalid (e.g. email), proceed without addressing them by name. \n
-    # """
-        # Birthday reminder
-        if "birthday_reminder" in self.Core.list_of_features and pd.notna(user["birthday_reminder"]) and user[
-            "birthday_reminder"] not in [None, [], {}] and (
-                not isinstance(user["birthday_reminder"], str) or user["birthday_reminder"].strip()):
-            instructions += """
-    - **Include a short message to remind them that their birthday is coming up.** \n
-    """
-        # Additional instructions for input columns
-        if self.Core.additional_instructions is not None or str(self.Core.additional_instructions).strip() != '':
-            instructions += str(self.Core.additional_instructions)
-        # instructions += self.fire_wall() + "\n"
-        final_instructions = f"""
-    {general_instructions}
-    {instructions}
-        """
-        return final_instructions
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def message_type_instructions(self):
-        """
-        create a proper instruction for the message type, regarding the input platform
-        :return: message instructions as a string
-        """
-        instructions = ""
-        message_style = self.message_style_instructions()
-        if self.Core.platform == "push":
-            instructions = f"""
-### ** General Specifications: **
-- Start directly with the message content without greetings or closing phrases.
-- Avoid using same or similar words so close together in "message" and "header", and make sure there is no grammar problem.
-- The message, vocabulary and sentences **MUST** sound like a natural conversation: something that people normally say in daily conversations.
-- {message_style}
-    """
-        elif self.Core.platform == "app":
-            instructions = f"""
-    Message Specifications:
-    - The message is an **in app notification**.
-    - ** Keep the First sentence as "header" that should be a short personalized eye catching sentence less than 40 character **.
-    - ** For the "header", don't use exclamation mark at the end, instead, use a space following with a proper emoji at the end of the "header" (e.g. Great work John 😍) **
-    - **Keep the message concise and straightforward**.
-    - **Start directly with the message content**; do not include greetings (e.g., "Hello") or closing phrases.
-    - Make the message highly **personalized** and **eye-catching**.
-        - "Personalized" means the user should feel the message is specifically crafted for them and not generic.
-    - **Every word should contribute to maximizing impact and engagement**.
-    - {message_style}
-            """
-        return instructions
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def message_style_instructions(self):
-        """
-        defines the style of the message: e.g. friendly, kind, tone, etc.
-        :return: style_instructions(str)
-        """
-        if self.Core.message_style is None:
-            message_style = ""
-        else:
-            message_style = f"""
-        - {self.Core.message_style}.
-        """
-        return message_style
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def output_instruction(self):
-        """
-        :return: output instructions as a string
-        """
-        example_output = self.example_output()
-        general_instructions = f"""
-- The "header" must be less than 30 character.
-- The "message" must be less than 100 character.
-- Preserve special characters and emojis in the message, you are **ONLY ALLOWED**allowed to use {self.Core.get_emoji()} emoji if needed, ONLY ONCE, and ONLY at the end of header or message).
-- Ensure that the output is a valid JSON and not include any text outside the JSON code block.
-        """
-        instructions = f"""
-Expected output structure:
-{{
-  "header": "Generated title",
-  "message": "Generated message",
-}}
-{general_instructions}
-    """
-        output_instructions = f"""
-### **Output instructions**:
-{example_output}
-{instructions}
-        """
-        return output_instructions
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def example_output(self):
-        """
-        returns an example output (1-shot) to guide the LLM
-        :return: example output
-        """
-        if self.Core.sample_example is None:
-            return ""
-        else:
-            # one shot prompting
-            example = f"""
-    Below are some acceptable examples. Create a header and message that follows the same style, tone, vocabulary, and characteristics of followed examples.
-    ### **Good Examples:**
-    {self.Core.sample_example}
-    """
-            return example
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def task_instructions(self):
-        """
-        creating instructions for specifying the tasks
-        :return:
-        """
-        if self.Core.involve_recsys_result and self.Core.messaging_mode != "message":
-            recsys_task = """
-    - Create a perfect message and the header following the instructions, using the user's information and the content that we want to recommend.
-    - Use the instructions to include the recommended content in the message.
-            """
-        else:
-            recsys_task = ""
-        message_task = """
-    - Create a header and a message considering the information and instructions mentioned. Your output format should be based on **Output instructions**."""
-        instructions = f"""
-    ### Tasks:
-    {recsys_task}
-    {message_task}
-    """
-        return instructions
-    # =======================================================
-    def redirect_to_for_you(self):
-        """
-        instructions to redirect the user to For you section or homepage
-        :return:
-        """
-        instructions = f"""
-        ** Note: **
-        We don't recommend a specific content and by opening the message, the user will be redirected to a page that contains personalized recommendations for them.
-        \n
-        """
-        return instructions

Messaging_system/SnowFlakeConnection.py DELETED Viewed

@@ -1,262 +0,0 @@
-"""
-This class create a connection to Snowflake, run queries (read and write)
-"""
-import json
-import numpy as np
-import pandas as pd
-from snowflake.snowpark import Session
-from sympy.strategies.branch import condition
-class SnowFlakeConn:
-    def __init__(self, session, brand):
-        self. session = session
-        self.brand = brand
-        self.final_columns = ['user_id', "email", "user_info", "permission", "expiration_date", "recsys_result", "message", "brand", "recommendation", "segment_name", "timestamp"]
-        self.campaign_id = {
-            "singeo": 460,
-            "pianote": 457,
-            "guitareo": 458,
-            "drumeo": 392
-        }
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def run_read_query(self, query, data):
-        """
-        Executes a SQL query on Snowflake that fetch the data
-        :return: Pandas dataframe containing the query results
-        """
-        # Connect to Snowflake
-        try:
-            dataframe = self.session.sql(query).to_pandas()
-            dataframe.columns = dataframe.columns.str.lower()
-            print(f"reading {data} table successfully")
-            return dataframe
-        except Exception as e:
-            print(f"Error in creating/updating table: {e}")
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def is_json_parsed_to_collection(self, s):
-        try:
-            parsed = json.loads(s)
-            return isinstance(parsed, (dict, list))
-        except:
-            return False
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def store_df_to_snowflake(self, table_name, dataframe, database="ONLINE_RECSYS", schema="GENERATED_DATA"):
-        """
-        Executes a SQL query on Snowflake that write the preprocessed data on new tables
-        :param query: SQL query string to be executed
-        :return: None
-        """
-        try:
-            self.session.use_database(database)
-            self.session.use_schema(schema)
-            dataframe = dataframe.reset_index(drop=True)
-            dataframe.columns = dataframe.columns.str.upper()
-            self.session.write_pandas(df=dataframe,
-                                      table_name=table_name.strip().upper(),
-                                      auto_create_table=True,
-                                      overwrite=True,
-                                      use_logical_type=True)
-            print(f"Data inserted into {table_name} successfully.")
-        except Exception as e:
-            print(f"Error in creating/updating/inserting table: {e}")
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def get_data(self, data, list_of_ids=None):
-        """
-        valid Data is = {users, contents, interactions, recsys, popular_contents}
-        :param data:
-        :return:
-        """
-        valid_data = {'users', 'contents', 'interactions', 'recsys', 'popular_contents'}
-        if data not in valid_data:
-            raise ValueError(f"Invalid data type: {data}")
-        # Construct the method name based on the input
-        method_name = f"_get_{data}"
-        # Retrieve the method dynamically
-        method = getattr(self, method_name, None)
-        if method is None:
-            raise NotImplementedError(f"The method {method_name} is not implemented.")
-        query = method(list_of_ids)
-        data = self.run_read_query(query, data)
-        return data
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def _get_contents(self, list_of_ids=None):
-        query = f"""
-        select CONTENT_ID, CONTENT_TYPE, CONTENT_PROFILE as content_info --, CONTENT_PROFILE_VECTOR
-        from ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT
-        where BRAND = '{self.brand}'
-        """
-        return query
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def _get_users(self, list_of_ids=None):
-        if list_of_ids is not None:
-            ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
-            condition = f"AND USER_ID in {ids_str}"
-        else :
-            condition = ""
-        query = f"""
-        select USER_ID, BRAND, FIRST_NAME, BIRTHDAY, TIMEZONE, EMAIL, CURRENT_TIMESTAMP() AS TIMESTAMP, DIFFICULTY, SELF_REPORT_DIFFICULTY, USER_PROFILE as user_info, PERMISSION, EXPIRATION_DATE,
-        DATEDIFF(
-        day,
-        CURRENT_DATE(),
-        CASE
-            WHEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY)) < CURRENT_DATE()
-            THEN DATE_FROM_PARTS(YEAR(CURRENT_DATE()) + 1, EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
-            ELSE DATE_FROM_PARTS(YEAR(CURRENT_DATE()), EXTRACT(MONTH FROM BIRTHDAY), EXTRACT(DAY FROM BIRTHDAY))
-        END) AS birthday_reminder
-        from ONLINE_RECSYS.PREPROCESSED.USERS
-        where BRAND = '{self.brand}' {condition}
-        """
-        return query
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def _get_interactions(self, list_of_ids=None):
-        if list_of_ids is not None:
-            ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
-            condition = f"AND USER_ID in {ids_str}"
-        else :
-            condition = ""
-        query = f"""
-        WITH latest_interactions AS(
-         SELECT
-            USER_ID, CONTENT_ID, CONTENT_TYPE, EVENT_TEXT, TIMESTAMP,
-            ROW_NUMBER() OVER(PARTITION BY USER_ID ORDER BY TIMESTAMP DESC) AS rn
-         FROM ONLINE_RECSYS.PREPROCESSED.RECSYS_INTEACTIONS
-         WHERE BRAND = '{self.brand}' AND EVENT_TEXT IN('Video Completed', 'Video Playing') {condition})
-        SELECT i.USER_ID, i.CONTENT_ID, i.CONTENT_TYPE, c.content_profile as last_completed_content, i.EVENT_TEXT, i.TIMESTAMP, DATEDIFF('week', i.TIMESTAMP, CURRENT_TIMESTAMP) AS weeks_since_last_interaction
-        FROM latest_interactions i
-        LEFT JOIN
-            ONLINE_RECSYS.VECTOR_DB.VECTORIZED_CONTENT c ON c.CONTENT_ID = i.CONTENT_ID
-        WHERE rn = 1;
-        """
-        return query
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def _get_recsys(self, list_of_ids=None):
-        if list_of_ids is not None:
-            ids_str = "(" + ", ".join(map(str, list_of_ids)) + ")"
-            condition = f"WHERE USER_ID in {ids_str}"
-        else :
-            condition = ""
-        recsys_col = f"{self.brand}_recsys_v3"
-        query = f"""
-        select USER_ID, {recsys_col} as recsys_result
-        from RECSYS_V3.RECSYS_CIO.RECSYS_V3_CUSTOMER_IO_OLD
-        {condition}
-        """
-        return query
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def _get_popular_contents(self, list_of_ids=None):
-        query = f"""
-        select POPULAR_CONTENT
-        from RECSYS_V3.RECSYS_CIO.POPULAR_CONTENT_CUSTOMER_IO_OLD
-        where brand = '{self.brand.lower()}'
-        """
-        return query
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def extract_id_from_email(self, emails):
-        """
-        extracting user_ids from emails
-        :param unique_emails:
-        :return:
-        """
-        email_list_str = ', '.join(f"'{email}'" for email in emails)
-        query = f"""
-        SELECT id as USER_ID, email as EMAIL
-        FROM STITCH.MUSORA_ECOM_DB.USORA_USERS
-        WHERE email IN ({email_list_str})
-        """
-        user_ids_df = self.run_read_query(query, data="User_ids")
-        return user_ids_df
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def adjust_dataframe(self, dataframe):
-        """
-        Filter dataframe to only include the columns in self.final_columns.
-        Add any missing columns with None values.
-        Ensure the final order is consistent with self.final_columns.
-        """
-        # Work with a copy so that we don't modify the original input
-        final_df = dataframe.copy()
-        # Normalize column names to lower-case for matching (if needed)
-        final_df.columns = final_df.columns.str.lower()
-        expected_cols = [col.lower() for col in self.final_columns]
-        # Keep only those columns in the expected list
-        available = [col for col in final_df.columns if col in expected_cols]
-        final_df = final_df[available]
-        # Add missing columns with None values
-        for col in expected_cols:
-            if col not in final_df.columns:
-                final_df[col] = None
-        # Reorder the columns to the desired order
-        final_df = final_df[expected_cols]
-        # If you need the column names to match exactly what self.final_columns provides (case-sensitive),
-        # you can rename them accordingly.
-        rename_mapping = {col.lower(): col for col in self.final_columns}
-        final_df.rename(columns=rename_mapping, inplace=True)
-        return final_df
-    # ==============================================================
-    def get_users_in_campaign(self, brand):
-        """
-        creating a query to fetch requested users
-        :param brand:
-        :return:
-        """
-        camp_id = self.campaign_id[brand]
-        query = f"""
-        SELECT email
-        FROM CUSTOMER_IO_DATA_SYNCING.ANALYTICS.vw_user_campaign_delivery_channel
-        where campaign_id = {str(camp_id)} AND email is not NULL AND TRIM(email) <> ''
-        """
-        users_df = self.run_read_query(query, data=f"{brand}_campaign")
-        return users_df
-    # ---------------------------------------------------------------
-    # ---------------------------------------------------------------
-    def close_connection(self):
-        self.session.close()

Messaging_system/context_validator.py DELETED Viewed

@@ -1,302 +0,0 @@
-import json
-import time
-import openai
-from openai import OpenAI
-from tqdm import tqdm
-class Validator:
-    """
-    LLM-based personalized message generator:
-    """
-    def __init__(self, api_key):
-        # will be set by the user
-        self.validator_instructions = None
-        self.api_key = api_key
-        self.model = "gpt-4o-mini"
-        # to trace the number of tokens and estimate the cost if needed
-        self.temp_token_counter = 0
-        self.total_tokens = {
-            'prompt_tokens': 0,
-            'completion_tokens': 0,
-        }
-    # -------------------------------------------------------------------
-    def set_openai_api(self, openai_key):
-        """
-        Setting template with placeholders manually connection
-        :param template: a string with placeholders
-        :return:
-        """
-        self.api_key = openai_key
-    # -------------------------------------------------------------------
-    def context_prompt(self):
-        instructions = """
-        You are a text moderator and you should parse the input text. based on below instructions. you should decide if
-        the input text is a valid input or not.
-        """
-        return instructions
-    # -------------------------------------------------------------------
-    def initial_prompt(self):
-        instructions = """You are a helpful assistant at Musora, an online music education platform that helps users
-        learn music. Our students will provide user-generated-context such as comments and forums on engaging musical
-        contents like songs, lessons, workouts or other type of musical and educational content. Your task is
-        to determine if the input text provided by our student is a valid text or not.
-        """
-        return instructions
-    # -------------------------------------------------------------------
-    def set_validator_instructions(self, valid_instructions="", invalid_instructions=""):
-        instructions = f"""
-        ** The text is INValid if it falls into any of the below criteria **:
-        {invalid_instructions}
-        {self.fire_wall()}
-        --------------------------
-        Please ensure that the text meets the following criteria to be considered **valid**:
-        {valid_instructions}
-        {self.default_valid_text()}
-        """
-        self.validator_instructions = instructions
-    # -------------------------------------------------------------------
-    def output_instruction(self):
-        """
-        :return: output instructions as a string
-        """
-        output_instructions = """
-        ** Task: **
-        - **Based on the input text, the music educational nature of our contents, and instructions about validating the student's input, check if the text is a valid input or not.**
-        - **Your output should be strictly "True" if it is a Valid text, or "False" if it not a valid text.**
-        - **You should provide the output in JSON format where the key is "valid"** - **Do not include any text outside the JSON code block**.
-        Your response should be in JSON format with the following structure:
-        example of a VALID text:
-        {
-            "valid": "True",
-        }
-        Example of an INVALID text:
-        {
-            "valid": "False",
-        }
-        """
-        return output_instructions
-    # -------------------------------------------------------------------
-    def get_llm_response(self, prompt, max_retries=3):
-        """
-        sending the prompt to the LLM and get back the response
-        """
-        openai.api_key = self.api_key
-        instructions = self.context_prompt()
-        client = OpenAI(api_key=self.api_key)
-        for attempt in range(max_retries):
-            try:
-                response = client.chat.completions.create(
-                    model=self.model,
-                    response_format={"type": "json_object"},
-                    messages=[
-                        {"role": "system", "content": instructions},
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=500,
-                    n=1,
-                    temperature=0.7
-                )
-                tokens = {
-                    'prompt_tokens': response.usage.prompt_tokens,
-                    'completion_tokens': response.usage.completion_tokens,
-                    'total_tokens': response.usage.total_tokens
-                }
-                try:
-                    content = response.choices[0].message.content
-                    # Extract JSON code block
-                    output = json.loads(content)
-                    if 'valid' not in output:
-                        print(f"'valid' key is missing in response on attempt {attempt + 1}. Retrying...")
-                        continue  # Continue to next attempt
-                    else:
-                        if output["valid"] not in ["True", "False"]:
-                            print(f"True or False value missing in response on attempt {attempt + 1}. Retrying...")
-                            continue
-                    # validating the JSON
-                    self.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
-                    self.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                    self.temp_token_counter += tokens['prompt_tokens'] + tokens['completion_tokens']
-                    return output
-                except json.JSONDecodeError:
-                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
-            except openai.APIConnectionError as e:
-                print("The server could not be reached")
-                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
-            except openai.RateLimitError as e:
-                print("A 429 status code was received; we should back off a bit.")
-            except openai.APIStatusError as e:
-                print("Another non-200-range status code was received")
-                print(e.status_code)
-                print(e.response)
-        print("Max retries exceeded. Returning empty response.")
-        return [], {}
-    # -------------------------------------------------------------------
-    def create_validation_prompt(self, input_text):
-        """
-        creating the proper prompt and instructions around the input text
-        :param input_text:
-        :return:
-        """
-        prompt = f"""
-        {self.initial_prompt()}
-        **Input text provided by the Student:**
-        {input_text}
-        {self.validator_instructions}
-        {self.output_instruction()}
-        """
-        return prompt
-    # -------------------------------------------------------------------
-    def validate_dataframe(self, dataframe, target_column, progress_callback=None):
-        """
-        generating the prompt for every user based on their text input, generating the results (True or False),
-        updating and returning the input dataframe. :return:
-        """
-        dataframe["valid"] = None
-        start_time = time.time()
-        total_users = len(dataframe)
-        for progress, (idx, row) in enumerate(tqdm(dataframe.iterrows(), desc="generating prompts")):
-            if progress_callback is not None:
-                progress_callback(progress, total_users)
-            input_text = row[target_column]
-            prompt = self.create_validation_prompt(input_text)
-            response = self.get_llm_response(prompt)
-            dataframe.at[idx, "valid"] = response["valid"]
-            current_time = time.time()
-            delta = current_time - start_time
-            # Check token limits
-            if self.temp_token_counter > 195000 and delta >= 60:  # Using a safe margin
-                print("Sleeping for 60 seconds to respect the token limit...")
-                # reset the token counter
-                self.temp_token_counter = 0
-                start_time = time.time()
-                time.sleep(60)  # Sleep for a minute before making new requests
-        return dataframe
-    # -------------------------------------------------------------------
-    def validate_text(self, text):
-        """
-        generating the prompt for every user based on their text input, generating the results (True or False),
-        updating and returning the input dataframe. :return:
-        """
-        prompt = self.create_validation_prompt(text)
-        response = self.get_llm_response(prompt)
-        return response["valid"]
-    # -------------------------------------------------------------------
-    def fire_wall(self):
-        """
-        Provide explicit instructions to ensure that sensitive or inappropriate information is identified in the text.
-        :return: string
-        """
-        fire_wall = """
-    As a content moderator, please review the text and ensure it does not contain any of the following:
-    **Disallowed Content Categories:**
-    1. **Sensitive Personal Information**: personal data such as phone numbers, email addresses, or other identifying information.
-    2. **Offensive or Discriminatory Language**: Hate speech, harassment, bullying, or any derogatory remarks targeting individuals or groups based on race, ethnicity, nationality, religion, gender, sexual orientation, age, disability, or any other characteristic.
-    3. **Sensitive Topics**: Content that discusses or promotes extremist views, political propaganda, or divisive religious beliefs in a manner that could incite hostility.
-    4. **Removed or Restricted Content**: Mentions of songs, media, or features that have been removed or are restricted on our platform.
-    5. **Technical Issues or Bugs**: Any references to glitches, errors, crashes, or other technical problems experienced on the platform.
-    6. ** Language that is excessively angry, aggressive, or includes profanity or vulgar expressions. **
-    7. **Privacy Violations**: Sharing of confidential information or content that infringes on someone's privacy rights.
-    8. **Intellectual Property Violations**: Unauthorized use or distribution of copyrighted material.
-    9. **Defamation**: False statements presented as facts that harm the reputation of an individual or organization.
-    **Examples of Invalid Content:**
-    - "This app is useless and the developers are idiots!"
-    - "They removed my favorite song; it sucks"
-    - "People who follow [specific religion] are all wrong and should be banned."
-    If the text contains any of the above issues, please flag it as invalid.
-    """
-        return fire_wall
-    # -------------------------------------------------------------------
-    def default_valid_text(self):
-        """
-        Provide explicit instructions to ensure that the text is appropriate and meets the content guidelines.
-        :return: string
-        """
-        valid_text = """
-    **Allowed Content Criteria:**
-    1. **Positive Sentiment**: The text should be encouraging, uplifting, or convey a positive emotion.
-    2. **Constructive and Helpful**: Provides valuable insights, advice, or shares personal experiences that could
-    benefit others. This can be sharing struggling in practices, challenges or other type of difficulties that might need our attention.
-    3. **Respectful Language**: Uses polite and appropriate language, fostering a friendly and inclusive community environment.
-    **Examples of Valid Content:**
-    - "I love how this app helps me discover new music every day!"
-    - "Here's a tip: creating themed playlists can really enhance your listening experience."
-    - "I had a great time using this feature during my commute today."
-    - "This session is so challenging for me and I'm feeling so much pain in my foot, might go over the workout couple more"
-    """
-        return valid_text

Messaging_system/protection_layer.py DELETED Viewed

@@ -1,143 +0,0 @@
-"""
-protection layer on top of the messaging system to make sure the messages are as expected.
-"""
-from Messaging_system.LLM import LLM
-# -----------------------------------------------------------------------
-class ProtectionLayer:
-    """
-    Protection layer to double check the generated message:
-    """
-    def __init__(self, CoreConfig):
-        self.Core = CoreConfig
-        self.llm = LLM(CoreConfig)
-        # to trace the number of tokens and estimate the cost if needed
-        self.total_tokens = {
-            'prompt_tokens': 0,
-            'completion_tokens': 0,
-        }
-    # --------------------------------------------------------------
-    # ----------------------------------------------------------------------
-    def llm_instructions(self) -> str:
-        """
-        System-level directions for the *second-pass* LLM that either approves
-        or fixes a push-notification draft produced earlier.
-        """
-        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
-        return f"""
-    You are a friendly copy-writer. **Approve the candidate JSON as-is, or
-    return a corrected version that obeys every rule below.**
-    ABSOLUTE RULES (override everything else)
-    • Output **only** valid JSON with exactly two keys: "header" and "message".
-    • Capitalize the **first** word in each value.
-    • Keep the original if it already passes every rule.
-    STYLE
-    • Sound like everyday speech: casual, friendly, concise.
-    • No greetings or sign-offs.
-    JARGON / BANNED CONTENT
-    • Never use any of these words (case-insensitive, all forms):
-    {jargon_list}
-    • Never use or paraphrase the following phrases (Voice ≠ instrument):
-      - Your voice is waiting
-      - Your voice awaits
-      - Your voice needs you
-      - Your voice is calling
-      - Your voice deserves more
-      - Hit the high notes / Hit those notes
-    """
-    # ----------------------------------------------------------------------
-    def get_general_rules(self) -> str:
-        """
-        Validation rules applied to both 'header' and 'message'.
-        """
-        rules = """
-- No two consecutive sentences may both end with '!'. Change one to '.'.
-- Begin directly with content—no greetings or closings.
-- Fix any grammar or spelling errors.
-- Preserve the exact JSON structure: {"header":"...", "message":"..."}.
-- Remove words that imply recency (e.g. “new”, “latest”, “upcoming”).
-- Capitalize the first word and any proper noun.
-- Would a friendly music instructor casually say such message? If not, rewrite as they would!
-- If no rule is violated, return the JSON unchanged.
-    """
-        return rules
-    # ----------------------------------------------------------------------
-    def output_instruction(self) -> str:
-        """
-        Explicit output contract (shown last so it’s freshest in token memory).
-        """
-        return """
-    **Return ONLY JSON, nothing else**
-    {
-      "header": "Header text here",
-      "message": "Message text here"
-    }
-    Constraints
-    - "header" ≤ 30 characters (including spaces & punctuation)
-    - "message" ≤ 100 characters
-    - Do NOT add, remove, or rename keys.
-    """
-    # ----------------------------------------------------------------------
-    def get_context(self) -> str:
-        """
-        High-level context for the LLM.
-        """
-        return (
-            "We generated a personalized push-notification. "
-            "Please check it against the rules and fix only what is necessary."
-        )
-    # ----------------------------------------------------------------------
-    def generate_prompt(self, message: str, user: dict) -> str:
-        """
-        Combine all pieces into the final prompt sent to the validator LLM.
-        """
-        prompt = f"""
-    ### Context
-    {self.get_context()}
-    ### Original JSON
-    {message}
-    ### Rules
-    {self.get_general_rules()}
-    ### Output Contract
-    {self.output_instruction()}
-    """
-        return prompt
-    # --------------------------------------------------------------
-    def criticize(self, message, user):
-        """
-        criticize the llm response by using additional layer of query
-        :return: updated users_df with extracted information and personalize messages.
-        """
-        prompt = self.generate_prompt(message, user)
-        response = self.llm.get_response(prompt=prompt, instructions=self.llm_instructions())
-        return response, self.total_tokens

Messaging_system/sending_time.py DELETED Viewed

@@ -1,69 +0,0 @@
-"""
-calculating sending time for each individual user
-"""
-import numpy as np
-from snowflake.snowpark import Session
-import json
-import pandas as pd
-import os
-from dotenv import load_dotenv
-load_dotenv()
-class PersonalizedTime:
-    """
-    This module will calcualte the best tiume to send for each individual users
-    """
-    def calculate_sending_time(self):
-        # fetching data
-        session = self.snowflake_connection()
-        query = self.fetch_users_time(session)
-    def fetch_users_time(self, session):
-        """
-        fetching user's activity data
-        :param dataframe:
-        :return:
-        """
-        query = self.get_query()
-        # Connect to Snowflake
-        try:
-            spark_df = session.sql(query).collect()
-            dataframe = pd.DataFrame(spark_df)
-            print(f"reading content table successfully")
-            return dataframe
-        except Exception as e:
-            print(f"Error in reading table: {e}")
-    def get_query(self):
-        query = """
-        """
-    def snowflake_connection(self):
-        """
-        setting snowflake connection
-        :return:
-        """
-        conn = {
-            "user": os.getenv('snowflake_user'),
-            "password": os.getenv('snowflake_password'),
-            "account": os.getenv('snowflake_account'),
-            "role": os.getenv('snowflake_role'),
-            "database": os.getenv('snowflake_database'),
-            "warehouse": os.getenv('snowflake_warehouse'),
-            "schema": os.getenv('snowflake_schema'),
-        }
-        session = Session.builder.configs(conn).create()
-        return session

README.md DELETED Viewed

@@ -1,16 +0,0 @@
----
-title: AI Message Generator
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: 'UI for AI Messaging system '
-license: apache-2.0
----
-AI messaging system UI

Singeo_camp.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

ai_messaging_system_v2/Data/test_camp.json ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+  "campaign_view":"singeo_re_engagement",
+  "campaign_name": "musora-staff-test-campaign",
+  "brand": "singeo",
+  "1": {
+    "identifier_column": "email",
+    "stage": 1,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Your next lesson is waiting 👇 \n Message: Check it out now and improve your singing!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "2": {
+    "identifier_column": "email",
+    "stage": 2,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: It’s a great day to sing 🤩,\n Message: It’s been a few days — warm up with a quick lesson!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "3": {
+    "identifier_column": "email",
+    "stage": 3,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Practice makes progress 💪, \nMessage: You don’t need to be perfect. But you do need practice to reach your goals!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "4": {
+    "identifier_column": "email",
+    "stage": 4,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Never stop learning, \nMessage: Take a lesson today and get back on track!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "5": {
+    "identifier_column": "email",
+    "stage": 5,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Get back on track ⏱️\nMessage: It’s been two weeks since your last practice session. Take a lesson today!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "6": {
+    "identifier_column": "email",
+    "stage": 6,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Keep on going!\nMessage: Get back to singing today. It only takes a few minutes!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "7": {
+    "identifier_column": "email",
+    "stage": 7,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Ready to sing? 🎤\nMessage: Let’s get started. Time for a quick practice session!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "8": {
+    "identifier_column": "email",
+    "stage": 8,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Your lesson’s waiting. 📥\nMessage: We want to hear you sing! Dive in today.",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "9": {
+    "identifier_column": "email",
+    "stage": 9,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Time for a comeback!\nMessage: We haven’t seen you in 25 days. This will help get you back into the groove!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "10": {
+    "identifier_column": "email",
+    "stage": 10,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: Have you been practicing?\nMessage: You have a lovely voice. We’d love to hear it again!",
+    "model": "gemini-2.5-flash-lite"
+  },
+  "11": {
+    "identifier_column": "email",
+    "stage": 11,
+    "segment_info": "Students who haven't practiced and logged into the app after at least 3 days.",
+    "recsys_contents": [
+      "workout",
+      "course",
+      "quick_tips"
+    ],
+    "involve_recsys_result": true,
+    "personalization": true,
+    "sample_examples": "Header: We Miss You 😔Message: All your lessons will just be here when you get back!",
+    "model": "gemini-2.5-flash-lite"
+  }
+}

ai_messaging_system_v2/Data/test_staff.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+email
+[email protected]
+[email protected]
+[email protected]
+[email protected]
+[email protected]
+[email protected]
+[email protected]
+[email protected]
+[email protected]
+[email protected]

ai_messaging_system_v2/Data/ui_output/.gitkeep ADDED Viewed

	@@ -0,0 +1,3 @@

+# UI output directory
+# This directory stores CSV output files when running in UI mode
+# Files are cleared on each new UI run

ai_messaging_system_v2/Data/ui_output/message_cost.csv ADDED Viewed

	@@ -0,0 +1,7 @@

+brand,campaign_name,number_of_messages,model,stage,total_prompt_tokens,total_completion_tokens,total_cost,timestamp
+drumeo,UI-Test-Campaign-Re-engagement,5,gpt-5-nano,1,4322,267,0.0003229,2026-01-12 01:39:36.723734+00:00
+drumeo,UI-Test-Campaign-Re-engagement,5,gpt-5-nano,1,3862,238,0.0002883,2026-01-12 01:39:38.216117+00:00
+drumeo,UI-Test-Campaign-Re-engagement,5,gemini-2.5-flash-lite,2,4467,208,0.0005298999999999999,2026-01-12 01:39:43.754321+00:00
+drumeo,UI-Test-Campaign-Re-engagement,5,gpt-5-nano,2,4194,227,0.0003005,2026-01-12 01:39:45.335461+00:00
+drumeo,UI-Test-Campaign-Re-engagement,5,gemini-2.5-flash-lite,3,4845,211,0.0005689000000000001,2026-01-12 01:39:49.489488+00:00
+drumeo,UI-Test-Campaign-Re-engagement,5,gpt-5-nano,3,4379,230,0.00031095000000000005,2026-01-12 01:39:52.901870+00:00

ai_messaging_system_v2/Data/ui_output/messages_a_drumeo_20260111_2039.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

ai_messaging_system_v2/Data/ui_output/messages_b_drumeo_20260111_2039.csv ADDED Viewed

	@@ -0,0 +1,61 @@

+user_id,email,first_name,birthday,birthday_reminder,user_info,instrument,platform,permission,expiration_date,recsys_result,message,brand,recommendation,recommendation_info,campaign_name,timestamp,stage
+876151,[email protected],,,,"Music Styles: Country, Funk, Hip-Hop/Rap, Jazz, Metal, Pop, Rock, Soul
+Music Topics: Composition, Creativity, Electronic Drums, Feet, Fills, Grooves, Hands, Independence, Performance, Rudiments, Theory
+Goals: Explore techniques, genres, and styles, Improve drumming technique, Learn as many songs as possible, Learn drumming theory, Stick to a consistent practice routine
+",Drum,push,plus,2026-01-28 17:27:30-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""1"": {""header"": ""Time for your drill session 👇"", ""message"": ""Explore tailored grooves and rudiments to fit your style in Recommendations."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:28.068450+00:00,1
+164216,[email protected],Carl,,,"Music Styles: Hip-Hop/Rap, Rock
+Music Topics: Electronic Drums, Feet, Fills, Grooves, Hands
+Goals: Improve drumming technique, Learn as many songs as possible, Stick to a consistent practice routine
+",Drum,push,plus,2026-01-20 12:04:08-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""1"": {""header"": ""Carl, time to practice"", ""message"": ""Open your Recommendations: a structured drill set to sharpen hands, grooves, and feet."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:28.068450+00:00,1
+223559,[email protected],Rene,1949-08-24,,"Music Styles: Blues, Country, Rock
+Music Topics: Feet, Fills, Grooves, Hands
+Goals: Improve drumming technique
+",Drum,push,plus,2026-03-06 12:03:54-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""1"": {""header"": ""Rene, your drums crave grooves"", ""message"": ""Check your personalized recommendations and build hands, feet, and groove skills together."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:28.068450+00:00,1
+881978,[email protected],,,,"Music Styles: Funk, Jazz, Rock
+Music Topics: Feet, Grooves
+Goals: Explore techniques, genres, and styles, Learn drumming theory
+",Drum,push,plus,2026-09-07 13:49:53-07:00,https://www.musora.com/drumeo/lessons/recommended,"{""1"": {""header"": ""Your drum groove awaits 👇"", ""message"": ""Jump back in with tailored picks: funk, jazz, and rock grooves plus feet technique. Your path continues here."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:28.068450+00:00,1
+560107,[email protected],,,,"Music Styles: CCM/Worship, Country, Jazz, Pop, Rock
+Music Topics: Creativity, Feet, Fills, Grooves, Hands, Performance
+Goals: Stick to a consistent practice routine
+",Drum,push,plus,2026-02-25 00:26:29-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""1"": {""header"": ""Back to the groove, drummer"", ""message"": ""Jump into your personalized recommendations and groove with hands, feet, and rhythm today."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:28.068450+00:00,1
+876151,[email protected],,,,"Music Styles: Country, Funk, Hip-Hop/Rap, Jazz, Metal, Pop, Rock, Soul
+Music Topics: Composition, Creativity, Electronic Drums, Feet, Fills, Grooves, Hands, Independence, Performance, Rudiments, Theory
+Goals: Explore techniques, genres, and styles, Improve drumming technique, Learn as many songs as possible, Learn drumming theory, Stick to a consistent practice routine
+",Drum,push,plus,2026-01-28 17:27:30-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""2"": {""header"": ""Drum path for you 🎯"", ""message"": ""Jump into tailored grooves and rudiments in Recommendations—your next step awaits."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:39.017914+00:00,2
+164216,[email protected],Carl,,,"Music Styles: Hip-Hop/Rap, Rock
+Music Topics: Electronic Drums, Feet, Fills, Grooves, Hands
+Goals: Improve drumming technique, Learn as many songs as possible, Stick to a consistent practice routine
+",Drum,push,plus,2026-01-20 12:04:08-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""2"": {""header"": ""Carl, keep the groove flowing"", ""message"": ""Check your Recommendations for a focused drill set on feet, hands, and grooves."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:39.017914+00:00,2
+223559,[email protected],Rene,1949-08-24,,"Music Styles: Blues, Country, Rock
+Music Topics: Feet, Fills, Grooves, Hands
+Goals: Improve drumming technique
+",Drum,push,plus,2026-03-06 12:03:54-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""2"": {""header"": ""Rene, tap for grooves ahead"", ""message"": ""Your personalized recommendations await—explore drills on feet, hands, and grooves to progress your technique."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:39.017914+00:00,2
+881978,[email protected],,,,"Music Styles: Funk, Jazz, Rock
+Music Topics: Feet, Grooves
+Goals: Explore techniques, genres, and styles, Learn drumming theory
+",Drum,push,plus,2026-09-07 13:49:53-07:00,https://www.musora.com/drumeo/lessons/recommended,"{""2"": {""header"": ""Back to the groove 🎶"", ""message"": ""Your personalized picks await—explore funk, jazz, and rock focus with feet and groove work, from your path."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:39.017914+00:00,2
+560107,[email protected],,,,"Music Styles: CCM/Worship, Country, Jazz, Pop, Rock
+Music Topics: Creativity, Feet, Fills, Grooves, Hands, Performance
+Goals: Stick to a consistent practice routine
+",Drum,push,plus,2026-02-25 00:26:29-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""2"": {""header"": ""Hands and feet together"", ""message"": ""Open your personalized recommendations and shape a steady practice rhythm today."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:39.017914+00:00,2
+876151,[email protected],,,,"Music Styles: Country, Funk, Hip-Hop/Rap, Jazz, Metal, Pop, Rock, Soul
+Music Topics: Composition, Creativity, Electronic Drums, Feet, Fills, Grooves, Hands, Independence, Performance, Rudiments, Theory
+Goals: Explore techniques, genres, and styles, Improve drumming technique, Learn as many songs as possible, Learn drumming theory, Stick to a consistent practice routine
+",Drum,push,plus,2026-01-28 17:27:30-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""3"": {""header"": ""Your drum path awaits 🥁"", ""message"": ""Dive into tailored grooves and rudiments in Recommendations—crafted for your style and goals."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:47.443090+00:00,3
+164216,[email protected],Carl,,,"Music Styles: Hip-Hop/Rap, Rock
+Music Topics: Electronic Drums, Feet, Fills, Grooves, Hands
+Goals: Improve drumming technique, Learn as many songs as possible, Stick to a consistent practice routine
+",Drum,push,plus,2026-01-20 12:04:08-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""3"": {""header"": ""Carl, keep the groove moving"", ""message"": ""Open your Picks: a hand/feet focused drill set to refine your rhythm and fills."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:47.443090+00:00,3
+223559,[email protected],Rene,1949-08-24,,"Music Styles: Blues, Country, Rock
+Music Topics: Feet, Fills, Grooves, Hands
+Goals: Improve drumming technique
+",Drum,push,plus,2026-03-06 12:03:54-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""3"": {""header"": ""Rene, your groove awaits"", ""message"": ""Open your Recommendations to explore hands, feet, and groove drills tailored to blues, country, and rock."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:47.443090+00:00,3
+881978,[email protected],,,,"Music Styles: Funk, Jazz, Rock
+Music Topics: Feet, Grooves
+Goals: Explore techniques, genres, and styles, Learn drumming theory
+",Drum,push,plus,2026-09-07 13:49:53-07:00,https://www.musora.com/drumeo/lessons/recommended,"{""3"": {""header"": ""Your groove guide awaits"", ""message"": ""Dive into tailored recommendations: funk, jazz, rock rhythms and feet work to expand your groove map."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:47.443090+00:00,3
+560107,[email protected],,,,"Music Styles: CCM/Worship, Country, Jazz, Pop, Rock
+Music Topics: Creativity, Feet, Fills, Grooves, Hands, Performance
+Goals: Stick to a consistent practice routine
+",Drum,push,plus,2026-02-25 00:26:29-08:00,https://www.musora.com/drumeo/lessons/recommended,"{""3"": {""header"": ""Your drum groove awaits"", ""message"": ""Dive into personalized guidance and shape steady rhythm with hands, feet, and creativity today."", ""content_id"": null, ""web_url_path"": ""https://www.musora.com/drumeo/lessons/recommended"", ""title"": null, ""thumbnail_url"": null, ""deeplink"": null}}",drumeo,for_you,Redirecting user to their personalized Recommendations,UI-Test-Campaign-Re-engagement,2026-01-12 01:39:47.443090+00:00,3

{Messaging_system → ai_messaging_system_v2/Messaging_system}/CoreConfig.py RENAMED Viewed

@@ -3,7 +3,13 @@ the flow of the Program starts from create_personalized_message function
 """
 import os
 import time
-from Messaging_system.SnowFlakeConnection import SnowFlakeConn
 class CoreConfig:
@@ -22,11 +28,11 @@ class CoreConfig:
         # LLM configs
         self.api_key = None  # will be set by user
         self.model = "gpt-4o" # default -> will be set by user
-        self.temperature = 0.8
         self.reasoning_model=False
         # will be set by user
-        self.personalization=False
         self.CTA = None
         self.message_style = None
         self.sample_example = None
@@ -40,6 +46,10 @@ class CoreConfig:
         self.consider_last_interaction = True
         self.additional_instructions = None
         # to trace the number of tokens and estimate the cost if needed
         self.temp_token_counter = 0
         self.total_tokens = {
@@ -51,7 +61,8 @@ class CoreConfig:
         self.recsys_result = None
         self.recsys_contents = ["song", "workout", "course", "quick_tips"]
         self.content_info = None
-        self.involve_recsys_result = False
         self.popular_contents_df = None
         # Additional_info
@@ -64,14 +75,16 @@ class CoreConfig:
         self.wait_time = None
         # Instantiate the connection to Snowflake
-        self.SF = SnowFlakeConn(session=self.session, brand=self.brand)
-        # segment name
-        self.segment_name = None
         # brand's voice language
         self.brand_voice = self.get_brand_voice()
     # ===============================================================
     def get_brand_voice(self):
         """
@@ -79,9 +92,11 @@ class CoreConfig:
         Returns:
             str: The brand voice phrases if file exists, otherwise None.
         """
-        file_path = f'Config_files/{self.brand.lower()}_phrases.txt'
-        if not os.path.exists(file_path):
             return None
         with open(file_path, 'r', encoding='utf-8') as f:
@@ -135,16 +150,6 @@ class CoreConfig:
         else:
             print(f"{messaging_mode} is not a valid messaging mode. available modes are: \n {valid_modes}")
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def set_openai_api(self, openai_key):
-        """
-        Setting openai key
-        :param openai_key: a string with placeholders
-        :return:
-        """
-        self.api_key = openai_key
     # --------------------------------------------------------------
     # --------------------------------------------------------------
     def set_number_of_samples(self, number_of_samples):
@@ -175,28 +180,6 @@ class CoreConfig:
         """
         self.segment_info = segment_info
-    # --------------------------------------------------------------
-    # --------------------------------------------------------------
-    def set_number_of_messages(self, number_of_messages=1, instructionset=None, subsequent_examples=None):
-        """
-        If the number of messages is more than 1, we will set self.subsequence_messages to a dictionary where
-        the key is an integer from 1 to number_of_messages, and the values are corresponding instructions in instructionset.
-        :param number_of_messages: int
-        :param instructionset: list of instructions
-        :return:
-        """
-        if number_of_messages == 1:
-            self.subsequence_messages = {1: None}
-        else:
-            if instructionset is not None:
-                self.subsequence_messages = instructionset
-            if subsequent_examples is not None:
-                self.subsequent_examples = subsequent_examples
-            else:
-                raise ValueError("Instructionset must have instructions for each subsequence message")
     # --------------------------------------------------------------
     # --------------------------------------------------------------
@@ -243,7 +226,7 @@ class CoreConfig:
         delta = current_time - self.start_time
         # Check token limits
-        if self.temp_token_counter > 3997000 and delta <= 60:  # Using a safe margin
             print("Sleeping for few seconds to respect the token limit...")
             # reset the token counter
             self.temp_token_counter = 0
@@ -272,13 +255,13 @@ class CoreConfig:
     # --------------------------------------------------------------
     # --------------------------------------------------------------
-    def set_segment_name(self, segment_name):
         """
         saving the current process
         :return:
         """
-        self.segment_name = segment_name
     # ==============================================================
     def set_personalization(self):

 """
 import os
 import time
+from pathlib import Path
+try:
+    from ..database import DatabaseManager
+except ImportError:
+    import sys
+    sys.path.append(str(Path(__file__).parent.parent))
+    from database import DatabaseManager
 class CoreConfig:
         # LLM configs
         self.api_key = None  # will be set by user
         self.model = "gpt-4o" # default -> will be set by user
+        self.temperature = 0.7
         self.reasoning_model=False
         # will be set by user
+        self.personalization=True
         self.CTA = None
         self.message_style = None
         self.sample_example = None
         self.consider_last_interaction = True
         self.additional_instructions = None
+        # Campaign and per-message instructions (new feature)
+        self.campaign_instructions = None
+        self.per_message_instructions = None
         # to trace the number of tokens and estimate the cost if needed
         self.temp_token_counter = 0
         self.total_tokens = {
         self.recsys_result = None
         self.recsys_contents = ["song", "workout", "course", "quick_tips"]
         self.content_info = None
+        self.involve_recsys_result = True
+        self.specific_content_id = None  # Force specific content for all users (overrides AI recommendation)
         self.popular_contents_df = None
         # Additional_info
         self.wait_time = None
         # Instantiate the connection to Snowflake
+        self.SF = DatabaseManager(session=self.session, brand=self.brand)
+        # campaign_name
+        self.campaign_name = None
         # brand's voice language
         self.brand_voice = self.get_brand_voice()
+        self.openai_fallback_enabled=False
     # ===============================================================
     def get_brand_voice(self):
         """
         Returns:
             str: The brand voice phrases if file exists, otherwise None.
         """
+        # Get the directory relative to the script location
+        script_dir = Path(__file__).parent.parent.resolve()  # Go up one level from Messaging_system
+        file_path = script_dir / 'Config_files' / f'{self.brand.lower()}_phrases.txt'
+        if not file_path.exists():
             return None
         with open(file_path, 'r', encoding='utf-8') as f:
         else:
             print(f"{messaging_mode} is not a valid messaging mode. available modes are: \n {valid_modes}")
     # --------------------------------------------------------------
     # --------------------------------------------------------------
     def set_number_of_samples(self, number_of_samples):
         """
         self.segment_info = segment_info
     # --------------------------------------------------------------
     # --------------------------------------------------------------
         delta = current_time - self.start_time
         # Check token limits
+        if self.temp_token_counter > 3995000 and delta <= 60:  # Using a safe margin
             print("Sleeping for few seconds to respect the token limit...")
             # reset the token counter
             self.temp_token_counter = 0
     # --------------------------------------------------------------
     # --------------------------------------------------------------
+    def set_segment_name(self, campaign_name):
         """
         saving the current process
         :return:
         """
+        self.campaign_name = campaign_name
     # ==============================================================
     def set_personalization(self):

{Messaging_system → ai_messaging_system_v2/Messaging_system}/DataCollector.py RENAMED Viewed

@@ -1,12 +1,18 @@
 """
 setting instructions and inputs required to generate personalized messages
 """
 import numpy as np
 import pandas as pd
 class DataCollector:
     def __init__(self, CoreConfig):
         self.Core = CoreConfig
@@ -22,8 +28,12 @@ class DataCollector:
         # extract user_ids and other data
         self.extract_musora_id()
         # selecting a sample of users
-        self.select_sample()
         self.fetch_data()
@@ -50,15 +60,12 @@ class DataCollector:
             raise Exception("Input data must contain user_id, musora_user_id, id, or email column.")
         # Normalize the identification column to 'user_id'
-        if id_col in ['musora_user_id', 'id']:
             self.Core.users_df.rename(columns={id_col: 'user_id'}, inplace=True)
         elif id_col == 'email':
             self._lookup_user_ids_from_email()
-        # Identify additional columns: exclude identification columns
-        identification_columns = {'user_id', 'email'} if 'email' in self.Core.users_df.columns else {'user_id'}
-        additional_columns = [col for col in self.Core.users_df.columns if col not in identification_columns]
-        self.Core.additional_info_columns = [col.lower() for col in additional_columns]
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
@@ -69,29 +76,61 @@ class DataCollector:
         """
         unique_emails = self.Core.users_df["email"].unique()
         data = self.Core.SF.extract_id_from_email(emails=unique_emails)
-        self.Core.users_df = pd.merge(self.Core.users_df, data, on='email', how='left')
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
     def remaining_days_to_birthday(self):
         """
-        calculating the remaining days to the user's birthday
-        :return: updating users_df
         """
-        # Iterate through each row in the DataFrame
         for idx, row in self.Core.users_df.iterrows():
             if pd.notna(row.get("birthday")):
-                if int(row["birthday_reminder"]) <= 7:
-                    remaining_days = int(row["birthday_reminder"])
-                    self.Core.users_df.at[idx, "birthday_reminder"] = f"{remaining_days} days until student's birthday"
-                else:
                     self.Core.users_df.at[idx, "birthday_reminder"] = None
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
     def fetch_data(self):
         # Fetch datasets
         user_ids = self.Core.users_df["user_id"].unique()
         users_data = self.Core.SF.get_data("users", user_ids)
@@ -115,7 +154,7 @@ class DataCollector:
             self.Core.users_df[col] = self.Core.users_df[col].replace(['', 'None', 'nan'], np.nan)
         # Now drop rows where 'permission' is missing
-        self.Core.users_df.dropna(subset=["permission"], inplace=True)
         self.Core.users_df = self.Core.users_df.drop_duplicates(subset=['user_id'])
         self.Core.content_info = contents_data
@@ -137,29 +176,12 @@ class DataCollector:
         self.Core.users_df["prompt"] = None  # will contain final prompt
         self.Core.users_df["instrument"] = self.Core.get_instrument()
         self.Core.users_df["platform"] = self.Core.platform
-        self.Core.users_df["segment_name"] = self.Core.segment_name
-    # -------------------------------------------------------------
-    # -------------------------------------------------------------
-    # def create_additional_information(self):
-    #     """
-    #     providing additional input and instructions based on available columns in the input file
-    #     :return: instructions
-    #     """
-    #     self.Core.users_df["additional_info"] = None
-    #
-    #     # Iterate through each row in the DataFrame
-    #     for idx, row in self.Core.users_df.iterrows():
-    #         additional_info = []
-    #
-    #         # populating additional_info
-    #         for feature in self.Core.additional_info_columns:
-    #             value = row.get(feature)
-    #             if pd.notna(value) and value not in [None, [], {}] and (
-    #                     not isinstance(value, str) or value.strip()):
-    #                 additional_info.append(f"{feature}: {str(value)}")
-    #
-    #         self.Core.users_df.at[idx, "additional_info"] = "\n".join(additional_info)
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
@@ -171,9 +193,208 @@ class DataCollector:
         """
         # Use self.number_of_samples if sample_size is None, otherwise default to 20
-        if sample_size is None:
-            sample_size = self.Core.number_of_samples if self.Core.number_of_samples is not None else 20
         total_users = self.Core.users_df.shape[0]
         sample_size = min(total_users, sample_size)
         self.Core.users_df = self.Core.users_df.sample(n=sample_size, replace=False)

 """
 setting instructions and inputs required to generate personalized messages
 """
+import json
 import numpy as np
 import pandas as pd
+from datetime import datetime, timezone
+from pathlib import Path
 class DataCollector:
+    # UI mode constants (same as Permes class)
+    UI_OUTPUT_DIR = Path(__file__).parent.parent / "Data" / "ui_output"
+    UI_OUTPUT_FILE = "messages.csv"
     def __init__(self, CoreConfig):
         self.Core = CoreConfig
         # extract user_ids and other data
         self.extract_musora_id()
+        if len(self.Core.users_df)==0:
+            # No valid user exist --> users don't have valid permission
+            return self.Core
         # selecting a sample of users
+        # self.select_sample()
         self.fetch_data()
             raise Exception("Input data must contain user_id, musora_user_id, id, or email column.")
         # Normalize the identification column to 'user_id'
+        if id_col in ['musora_user_id', 'id', 'user_id']:
             self.Core.users_df.rename(columns={id_col: 'user_id'}, inplace=True)
+            self._lookup_permissions()
         elif id_col == 'email':
             self._lookup_user_ids_from_email()
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
         """
         unique_emails = self.Core.users_df["email"].unique()
         data = self.Core.SF.extract_id_from_email(emails=unique_emails)
+        # self.Core.users_df = pd.merge(self.Core.users_df, data, on='email', how='left')
+        self.Core.users_df = data
+    # =================================================================
+    def _lookup_permissions(self):
+        """
+        Looks up emails and permissions based on unique user_ids and merges the results
+        into self.users_df. Assumes self.users_df contains an 'user_id' column.
+        """
+        unique_ids = self.Core.users_df["user_id"].unique()
+        data = self.Core.SF.extract_email_from_id(unique_ids=unique_ids)
+        # self.Core.users_df = pd.merge(self.Core.users_df, data, on='email', how='left')
+        self.Core.users_df = data
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
     def remaining_days_to_birthday(self):
         """
+        Calculate the remaining days to each user's birthday.
+        Only store the number if it's less than or equal to 6 days,
+        otherwise store None.
         """
+        today = datetime.now().date()
+        self.Core.users_df["birthday_reminder"] = None
         for idx, row in self.Core.users_df.iterrows():
             if pd.notna(row.get("birthday")):
+                try:
+                    # Parse the birthday value (Snowflake datetime format)
+                    bday = pd.to_datetime(row["birthday"]).date()
+                    # Replace year with current year
+                    next_bday = bday.replace(year=today.year)
+                    # If birthday already passed this year, use next year
+                    if next_bday < today:
+                        next_bday = next_bday.replace(year=today.year + 1)
+                    # Days until birthday
+                    remaining_days = (next_bday - today).days
+                    # Save only if within 6 days
+                    if remaining_days <= 6:
+                        self.Core.users_df.at[idx, "birthday_reminder"] = remaining_days
+                    else:
+                        self.Core.users_df.at[idx, "birthday_reminder"] = None
+                except Exception as e:
+                    # Handle invalid date formats gracefully
                     self.Core.users_df.at[idx, "birthday_reminder"] = None
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
     def fetch_data(self):
+        """
+        Fetch all required data for the process: Users, contents, interaction_data, recsys data, popular contents
+        :return:
+        """
         # Fetch datasets
         user_ids = self.Core.users_df["user_id"].unique()
         users_data = self.Core.SF.get_data("users", user_ids)
             self.Core.users_df[col] = self.Core.users_df[col].replace(['', 'None', 'nan'], np.nan)
         # Now drop rows where 'permission' is missing
+        # self.Core.users_df.dropna(subset=["permission"], inplace=True)
         self.Core.users_df = self.Core.users_df.drop_duplicates(subset=['user_id'])
         self.Core.content_info = contents_data
         self.Core.users_df["prompt"] = None  # will contain final prompt
         self.Core.users_df["instrument"] = self.Core.get_instrument()
         self.Core.users_df["platform"] = self.Core.platform
+        self.Core.users_df["campaign_name"] = self.Core.campaign_name
+        # creating timestamp
+        now_utc = datetime.now(timezone.utc)
+        self.Core.users_df["timestamp"] = now_utc
+        self.Core.users_df["brand"] = str(self.Core.brand).lower()
     # -----------------------------------------------------------------
     # -----------------------------------------------------------------
         """
         # Use self.number_of_samples if sample_size is None, otherwise default to 20
+        if sample_size is None or sample_size==0:
+            # set it to be 20 by default
+            # sample_size = self.Core.number_of_samples if self.Core.number_of_samples is not None else 20
+            sample_size = 500
         total_users = self.Core.users_df.shape[0]
         sample_size = min(total_users, sample_size)
         self.Core.users_df = self.Core.users_df.sample(n=sample_size, replace=False)
+    def fetch_log_data(self, stage, test_mode, mode="production", ui_experiment_id=None):
+        """
+        Fetch data for users that we have already generated messages for them.
+        Args:
+            stage: Current stage number
+            test_mode: Boolean, if True ignores cooldown period
+            mode: str, operating mode - "production", "test", or "ui"
+            ui_experiment_id: Optional experiment ID for UI mode (e.g., 'messages_a_drumeo_20260111_1756')
+        Returns:
+            CoreConfig or None
+        """
+        self.extract_musora_id()
+        user_ids = self.Core.users_df["user_id"].unique()
+        if len(user_ids)==0:
+            print("No users found")
+            return None
+        else:
+            # UI mode: Read from local CSV
+            if mode == "ui":
+                users_data = self._read_log_data_from_csv(user_ids, stage, self.Core.campaign_name, ui_experiment_id)
+            else:
+                # Production/Test mode: Read from Snowflake
+                users_data = self.Core.SF.get_log_data(user_ids, stage, self.Core.campaign_name, test_mode)
+            if len(users_data) == 0:
+                print("No users found")
+                return None
+            else:
+                users_data = self._prepare_log_data(users_data)
+                contents_data = self.Core.SF.get_data("contents")
+                popular_contents_data = self.Core.SF.get_data("popular_contents")
+                self.Core.users_df["user_id"] = self.Core.users_df["user_id"].astype(int)
+                # Merge additional user details into the base dataframe (self.users_df)
+                # Assuming self.users_df already exists and contains a "USER_ID" column
+                self.Core.users_df = self.Core.users_df.merge(users_data, on="user_id", how="left", suffixes=("", "_users"))
+                for col in self.Core.users_df.columns:
+                    # Replace additional empty representations with np.nan
+                    self.Core.users_df[col] = self.Core.users_df[col].replace(['', 'None', 'nan'], np.nan)
+                self.remaining_days_to_birthday()
+                self.Core.content_info = contents_data
+                self.Core.popular_contents_df = popular_contents_data
+                now_utc = datetime.now(timezone.utc)
+                self.Core.users_df["timestamp"] = now_utc
+                self.Core.users_df["brand"] = str(self.Core.brand).lower()
+                return self.Core
+    def _prepare_log_data(self, users_df):
+        df = users_df.copy()
+        # Make sure stage is int (Snowflake can return numpy types)
+        df["stage"] = df["stage"].astype(int)
+        df[["_header", "_body"]] = df.apply(self._extract_header_body, axis=1)
+        prev_text = (
+            df.groupby("user_id", as_index=False)
+            .apply(lambda g: pd.Series({"previous_messages": self._build_previous_messages(g)}))
+        )
+        # --- pick the single most recent record per user (i.e., highest stage in df == stage-1) ---
+        most_recent_per_user = (
+            df.sort_values(["user_id", "stage"], ascending=[True, False])
+            .groupby("user_id", as_index=False)
+            .head(1)
+            .drop(columns=["_header", "_body"])
+        )
+        # --- final user_df: one row per user + previous_messages column ---
+        users_df = most_recent_per_user.merge(prev_text, on="user_id", how="left")
+        return users_df
+    # --- extract header/body from MESSAGE JSON for each row ---
+    def _extract_header_body(self, row):
+        raw = row.get("message")
+        try:
+            payload = json.loads(raw) if isinstance(raw, str) else (raw or {})
+        except Exception:
+            payload = {}
+        # prefer the key equal to this row's stage, else fall back to first dict value
+        stage_key = str(row["stage"])
+        node = payload.get(stage_key)
+        if not isinstance(node, dict) and isinstance(payload, dict) and payload:
+            node = next((v for v in payload.values() if isinstance(v, dict)), {})
+        header = (node.get("header") if isinstance(node, dict) else "") or ""
+        body = (node.get("message") if isinstance(node, dict) else "") or ""
+        return pd.Series({"_header": header.strip(), "_body": body.strip()})
+    def _build_previous_messages(self, g: pd.DataFrame) -> str:
+        # --- build previous_messages (merge last up-to-3 messages into one string) ---
+        g = g.sort_values("stage")  # chronological (oldest -> newest)
+        msgs = [{"header": h, "message": b} for h, b in zip(g["_header"], g["_body"])]
+        recent = msgs[-3:]  # up to 3 most recent
+        parts = []
+        for i, m in enumerate(recent, start=1):
+            header = (m.get("header") or "").strip()
+            body = (m.get("message") or "").strip()
+            parts.append(f"Message {i}: (header) {header}\n           (message) {body}")
+        return "\n\n".join(parts)
+    # ======================= UI MODE HELPER FUNCTIONS =======================
+    def _read_log_data_from_csv(self, user_ids, stage, campaign_name, ui_experiment_id=None):
+        """
+        Read historical message data from local CSV file in UI mode.
+        This function reads from the single CSV file that contains all previous stages,
+        similar to reading from Snowflake in production mode.
+        IMPORTANT: Deduplicates by (user_id, stage) to match Snowflake behavior,
+        keeping only the most recent record for each (user_id, stage) pair.
+        Args:
+            user_ids: Array of user IDs to filter
+            stage: Current stage number
+            campaign_name: Campaign name
+            ui_experiment_id: Optional experiment ID for UI mode (e.g., 'messages_a_drumeo_20260111_1756')
+        Returns:
+            pd.DataFrame: Historical message data for previous stages (deduplicated)
+        """
+        # Use experiment ID if provided (for AB testing), otherwise use default filename
+        if ui_experiment_id:
+            messages_file = self.UI_OUTPUT_DIR / f"{ui_experiment_id}.csv"
+        else:
+            messages_file = self.UI_OUTPUT_DIR / self.UI_OUTPUT_FILE
+        # Check if file exists
+        if not messages_file.exists():
+            print(f"⚠️  UI Mode: No previous message data found at {messages_file}")
+            print(f"    This is expected for stage 1 or first run.")
+            return pd.DataFrame()
+        try:
+            # Read CSV with UTF-8-SIG encoding to support emojis and handle BOM
+            all_messages = pd.read_csv(messages_file, encoding='utf-8-sig')
+            # Normalize column names to lowercase
+            all_messages.columns = all_messages.columns.str.lower()
+            # Filter for:
+            # 1. Matching user IDs
+            # 2. Matching campaign name
+            # 3. Previous stages only (stage < current stage)
+            filtered_messages = all_messages[
+                (all_messages['user_id'].isin(user_ids)) &
+                (all_messages['campaign_name'] == campaign_name) &
+                (all_messages['stage'] < stage)
+            ]
+            if len(filtered_messages) == 0:
+                print(f"ℹ️  UI Mode: No previous messages found for {len(user_ids)} users")
+                return pd.DataFrame()
+            # CRITICAL: Deduplicate by (user_id, stage) to match Snowflake behavior
+            # Keep only the most recent record per (user_id, stage) based on timestamp
+            # This prevents duplicate rows from being processed in follow-up stages
+            if 'timestamp' in filtered_messages.columns:
+                # Convert timestamp to datetime for proper sorting
+                filtered_messages['timestamp'] = pd.to_datetime(filtered_messages['timestamp'])
+                # Sort by timestamp descending and keep first (most recent) per (user_id, stage)
+                filtered_messages = (
+                    filtered_messages
+                    .sort_values('timestamp', ascending=False)
+                    .groupby(['user_id', 'stage'], as_index=False)
+                    .first()
+                )
+            total_before_dedup = len(all_messages[(all_messages['user_id'].isin(user_ids)) &
+                                                   (all_messages['campaign_name'] == campaign_name) &
+                                                   (all_messages['stage'] < stage)])
+            deduped_count = len(filtered_messages)
+            print(f"✅ UI Mode: Loaded {deduped_count} previous messages from {messages_file}")
+            if total_before_dedup > deduped_count:
+                print(f"   (Deduplicated {total_before_dedup - deduped_count} duplicate records)")
+            return filtered_messages
+        except Exception as e:
+            print(f"❌ Error reading UI mode data from {messages_file}: {str(e)}")
+            return pd.DataFrame()

{Messaging_system → ai_messaging_system_v2/Messaging_system}/Homepage_Recommender.py RENAMED Viewed

File without changes

{Messaging_system → ai_messaging_system_v2/Messaging_system}/LLM.py RENAMED Viewed

@@ -3,16 +3,14 @@ This class contains multiple LLMs and handles LLMs response
 """
 import json
-import time
 from openai import OpenAI
 import openai
-import torch
 import re
-import anthropic
 import os
-import streamlit as st
 from google.genai import types
 from google import genai
@@ -20,28 +18,43 @@ class LLM:
     def __init__(self, Core):
         self.Core = Core
         self.model = None
-        self.model_type = "openai" # valid values -> ["openai", "ollama"]
         self.client = None
         self.connect_to_llm()
     def get_credential(self, key):
-        return os.getenv(key) or st.secrets.get(key)
-    def get_response(self, prompt, instructions):
-        if self.model_type == "openai":
-            response = self.get_message_openai(prompt, instructions)
-        # elif self.model_type == "ollama":
-        #     response = self.get_message_ollama(prompt, instructions)
-        elif self.model_type == "inference":
-            response = self.get_message_inference(prompt, instructions)
-        elif self.model_type == "claude":
-            response = self.get_message_claude(prompt, instructions)
-        elif self.model_type == "google":
-            response = self.get_message_google(prompt, instructions)
-        else:
-            raise f"Invalid model type : {self.model_type}"
-        return response
     def connect_to_llm(self):
         """
@@ -52,118 +65,61 @@ class LLM:
         if self.Core.model in self.Core.config_file["openai_models"]:
             self.model_type = "openai"
-        elif self.Core.model in self.Core.config_file["inference_models"]:
-            self.model_type = "inference"
         elif self.Core.model in self.Core.config_file["google_models"]:
             self.model_type = "google"
-        # elif self.Core.model in self.Core.config_file["ollama_models"]:
-        #     self.model_type = "ollama"
-        #     self.client = ollama.Client()
-        elif self.Core.model in self.Core.config_file["claude_models"]:
-            self.model_type = "claude"
-            self.client = anthropic.Anthropic(
-                api_key=self.get_credential('claude_api_key'),
-            )
         self.model = self.Core.model
-    # ==============================================================
-    def get_message_inference(self, prompt, instructions, max_retries=4):
-        """
-        sending the prompt to openai LLM and get back the response
         """
-        api_key = self.get_credential('inference_api_key')
-        client = OpenAI(
-            base_url="https://api.inference.net/v1",
-            api_key=api_key,
-        )
-        for attempt in range(max_retries):
-            try:
-                if self.Core.reasoning_model:
-                    response = client.chat.completions.create(
-                        model=self.Core.model,
-                        response_format={"type": "json_object"},
-                        messages=[
-                            {"role": "system", "content": instructions},
-                            {"role": "user", "content": prompt}
-                        ],
-                        reasoning_effort="low",
-                        n=1,
-                    )
-                else:
-                    response = client.chat.completions.create(
-                        model=self.Core.model,
-                        response_format={"type": "json_object"},
-                        messages=[
-                            {"role": "system", "content": instructions},
-                            {"role": "user", "content": prompt}
-                        ],
-                        n=1,
-                        temperature=self.Core.temperature
-                    )
-                tokens = {
-                    'prompt_tokens': response.usage.prompt_tokens,
-                    'completion_tokens': response.usage.completion_tokens,
-                    'total_tokens': response.usage.total_tokens
-                }
-                # validating the JSON
-                self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
-                self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                self.Core.temp_token_counter += tokens['total_tokens']
-                try:
-                    content = response.choices[0].message.content
-                    # Extract JSON code block
-                    output = json.loads(content)
-                    if 'message' not in output or 'header' not in output:
-                        print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
-                        continue  # Continue to next attempt
-                    else:
-                        if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
-                                output["message"].strip()) > self.Core.config_file["message_limit"]:
-                            print(
-                                f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
-                            continue
-                    return output
-                except json.JSONDecodeError:
-                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
-            except openai.APIConnectionError as e:
-                print("The server could not be reached")
-                print(e.__cause__)  # an underlying Exception, likely raised within httpx.
-            except openai.RateLimitError as e:
-                print("A 429 status code was received; we should back off a bit.")
-            except openai.APIStatusError as e:
-                print("Another non-200-range status code was received")
-                print(e.status_code)
-                print(e.response)
-        print("Max retries exceeded. Returning empty response.")
-        return None
     # =========================================================================
-    def get_message_google(self, prompt, instructions, max_retries=4):
-        client = genai.Client(api_key=self.get_credential("Google_API"))
         for attempt in range(max_retries):
             try:
                 response = client.models.generate_content(
-                    model=self.Core.model,
                     contents=prompt,
                     config=types.GenerateContentConfig(
                         thinking_config=types.ThinkingConfig(thinking_budget=0),
@@ -172,34 +128,26 @@ class LLM:
                         response_mime_type="application/json"
                     ))
                 tokens = {
                     'prompt_tokens': response.usage_metadata.prompt_token_count,
                     'completion_tokens': response.usage_metadata.candidates_token_count,
                     'total_tokens': response.usage_metadata.total_token_count
                 }
-                # validating the JSON
                 self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
                 self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                self.Core.temp_token_counter += tokens['total_tokens']
                 output = self.preprocess_and_parse_json(response.text)
-                # output = json.loads(str(response.text))
-                if 'message' not in output or 'header' not in output:
-                    print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
-                    continue  # Continue to next attempt
-                else:
-                    if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
-                            output["message"].strip()) > self.Core.config_file["message_limit"]:
-                        print(
-                            f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
-                        headchar= len(output["header"].strip())
-                        messagechar = len(output["message"].strip())
-                        print(
-                            f"'header' has {headchar} chars and 'message' has {messagechar} ...")
-                        continue
                 return output
             except json.JSONDecodeError:
@@ -212,13 +160,13 @@ class LLM:
     # =========================================================================
-    def get_message_openai(self, prompt, instructions, max_retries=4):
         """
         sending the prompt to openai LLM and get back the response
         """
-        openai.api_key = self.Core.api_key
-        client = OpenAI(api_key=self.Core.api_key)
         for attempt in range(max_retries):
             try:
@@ -252,29 +200,24 @@ class LLM:
                     'total_tokens': response.usage.total_tokens
                 }
-                # validating the JSON
-                self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
-                self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                self.Core.temp_token_counter += tokens['total_tokens']
                 try:
                     content = response.choices[0].message.content
                     # Extract JSON code block
                     output = json.loads(content)
-                    if 'message' not in output or 'header' not in output:
-                        print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
-                        continue  # Continue to next attempt
-                    else:
-                        if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
-                                output["message"].strip()) > self.Core.config_file["message_limit"]:
-                            print(
-                                f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
-                            continue
                     return output
                 except json.JSONDecodeError:
@@ -293,130 +236,14 @@ class LLM:
         print("Max retries exceeded. Returning empty response.")
         return None
-    # ======================================================================
-    def get_message_ollama(self, prompt, instructions, max_retries=10):
-        """
-        Send the prompt to the LLM and get back the response.
-        Includes handling for GPU memory issues by clearing cache and waiting before retry.
-        """
-        prompt = instructions + "\n \n" + prompt
-        for attempt in range(max_retries):
-            try:
-                # Try generating the response
-                response = self.client.generate(model=self.model, prompt=prompt)
-            except Exception as e:
-                # This catches errors like the connection being forcibly closed
-                print(f"Error on attempt {attempt + 1}: {e}.")
-                try:
-                    # Clear GPU cache if you're using PyTorch; this may help free up memory
-                    torch.cuda.empty_cache()
-                    print("Cleared GPU cache.")
-                except Exception as cache_err:
-                    print("Failed to clear GPU cache:", cache_err)
-                # Wait a bit before retrying to allow memory to recover
-                time.sleep(2)
-                continue
-            try:
-                tokens = {
-                    'prompt_tokens': 0,
-                    'completion_tokens': 0,
-                    'total_tokens': 0
-                }
-                try:
-                    output = self.preprocess_and_parse_json(response.response)
-                    if output is None:
-                        continue
-                    if 'message' not in output or 'header' not in output:
-                        print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
-                        continue  # Continue to next attempt
-                    else:
-                        if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
-                                output["message"].strip()) > self.Core.config_file["message_limit"]:
-                            print(
-                                f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
-                            continue
-                        else:
-                            return output
-                except json.JSONDecodeError:
-                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
-            except Exception as parse_error:
-                print("Error processing output:", parse_error)
-        print("Max retries exceeded. Returning empty response.")
-        return None
-    def get_message_claude(self, prompt, instructions, max_retries=4):
-        """
-        send prompt to claude LLM and get back the response
-        :param prompt:
-        :param instructions:
-        :return:
-        """
-        for attempt in range(max_retries):
-            try:
-                message = self.client.messages.create(
-                    model=self.model,
-                    max_tokens=4096,
-                    system = instructions,
-                    messages=[
-                        {"role": "user", "content": prompt + "\nHere is the JSON requested:\n"}
-                    ],
-                    temperature=self.Core.temperature
-                )
-                # Try generating the response
-                response = message.content[0].text
-                tokens = {
-                    'prompt_tokens': message.usage.input_tokens,
-                    'completion_tokens': message.usage.output_tokens,
-                    'total_tokens': message.usage.output_tokens + message.usage.input_tokens
-                }
-                self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
-                self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
-                self.Core.temp_token_counter += tokens['total_tokens']
-                try:
-                    output = self.preprocess_and_parse_json_claude(response)
-                    if output is None:
-                        continue
-                    if 'message' not in output or 'header' not in output:
-                        print(f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying...")
-                        continue  # Continue to next attempt
-                    else:
-                        if len(output["header"].strip()) > self.Core.config_file["header_limit"] or len(
-                                output["message"].strip()) > self.Core.config_file["message_limit"]:
-                            print(
-                                f"'header' or 'message' is more than specified characters in response on attempt {attempt + 1}. Retrying...")
-                            continue
-                        else:
-                            return output
-                except json.JSONDecodeError:
-                    print(f"Invalid JSON from LLM on attempt {attempt + 1}. Retrying...")
-            except Exception as parse_error:
-                print("Error processing output:", parse_error)
-        print("Max retries exceeded. Returning empty response.")
-        return None
-    # ======================================================================
     def preprocess_and_parse_json(self, response: str):
         """
         Remove <think> blocks, extract JSON (from ```json fences or first {...} block),
-        and parse. Includes a repair pass to handle common LLM issues like trailing commas.
         """
         def extract_json(text: str) -> str:
@@ -448,45 +275,153 @@ class LLM:
             # Remove commas before } or ]
             return re.sub(r',(\s*[}\]])', r'\1', text)
         raw = extract_json(response)
         raw = normalize_quotes(raw)
-        try:
-            return json.loads(raw)
-        except json.JSONDecodeError:
             # Repair pass
             repaired = strip_comments(raw)
-            repaired = remove_trailing_commas(repaired)
-            repaired = repaired.strip()
-            try:
-                return json.loads(repaired)
-            except json.JSONDecodeError as e:
-                print(f"Failed to parse JSON: {e}")
-                # print('Offending text:', repaired)
-                return None
     # ===============================================================
-    def preprocess_and_parse_json_claude(self, response: str):
         """
-        Process Claude response and extract JSON content safely
         """
-        try:
-            json_start = response.index("{")
-            json_end = response.rfind("}")
-            json_string = response[json_start:json_end + 1]
-            parsed_response = json.loads(json_string)
-            if not isinstance(parsed_response, dict):
-                raise ValueError(f"Parsed response is not a dict: {parsed_response}")
-            return parsed_response
-        except ValueError as ve:
-            raise ValueError(f"Could not extract JSON from Claude response: {ve}\nOriginal response: {response}")
-        except json.JSONDecodeError as je:
-            raise ValueError(f"Failed to parse JSON from string: {json_string}\nError: {je}")

 """
 import json
 from openai import OpenAI
 import openai
 import re
 import os
 from google.genai import types
 from google import genai
+import logging
+logger = logging.getLogger()
     def __init__(self, Core):
         self.Core = Core
         self.model = None
+        self.model_type = "openai"
         self.client = None
+        # failure tracking + cached model lists
+        self.failure_counts = {}  # {"gemini-2.5-flash": 2, ...}
+        # self.google_models = self.Core.config_file.get("google_models", [])
+        self.google_models= ["gemini-2.5-flash-lite", "gemini-2.5-flash"]
+        self.openai_models = self.Core.config_file.get("openai_models", [])
+        self.openai_fallback_model = self.Core.config_file.get("openai_fallback_models", "gpt-5-nano")
+        self.failure_threshold = self.Core.config_file.get("model_failure_threshold", 3)
         self.connect_to_llm()
     def get_credential(self, key):
+        return os.getenv(key)
+    def get_response(self, prompt, instructions, validation_mode="message_generation"):
+        """
+        Get response from LLM with specified validation mode.
+        Args:
+            prompt: The prompt to send to the LLM
+            instructions: System instructions for the LLM
+            validation_mode: Type of validation to perform on the response
+                - "message_generation": Validates header/message keys and character limits
+                - "validation_response": Validates approved/issues/feedback keys
+                - "generic_json": Only validates that it's valid JSON, no specific keys required
+        Returns:
+            Parsed JSON response or None if all retries failed
+        """
+        if self.model_type == "google":
+            return self._get_response_google_with_fallback(prompt, instructions, validation_mode)
+        elif self.model_type == "openai":
+            response = self.get_message_openai(prompt, instructions, validation_mode=validation_mode)
+            return response
+        else:
+            raise RuntimeError(f"Invalid model type : {self.model_type}")
     def connect_to_llm(self):
         """
         if self.Core.model in self.Core.config_file["openai_models"]:
             self.model_type = "openai"
         elif self.Core.model in self.Core.config_file["google_models"]:
             self.model_type = "google"
         self.model = self.Core.model
+    def _validate_response(self, output: dict, validation_mode: str, attempt: int) -> tuple:
         """
+        Validate LLM response based on validation mode.
+        Args:
+            output: Parsed JSON output from LLM
+            validation_mode: Type of validation to perform
+            attempt: Current attempt number (for logging)
+        Returns:
+            Tuple of (is_valid: bool, error_message: str or None)
+        """
+        if validation_mode == "message_generation":
+            # Validate header/message keys and character limits
+            if 'message' not in output or 'header' not in output:
+                return False, f"'message' or 'header' is missing in response on attempt {attempt + 1}. Retrying..."
+            header_len = len(output["header"].strip())
+            message_len = len(output["message"].strip())
+            header_limit = self.Core.config_file["header_limit"]
+            message_limit = self.Core.config_file["message_limit"]
+            if header_len > header_limit or message_len > message_limit:
+                return False, f"'header' ({header_len}/{header_limit}) or 'message' ({message_len}/{message_limit}) exceeds character limit on attempt {attempt + 1}. Retrying..."
+            return True, None
+        elif validation_mode == "validation_response":
+            # Validate approved/issues/feedback keys for SecurityAgent
+            if 'approved' not in output:
+                return False, f"'approved' key is missing in validation response on attempt {attempt + 1}. Retrying..."
+            return True, None
+        elif validation_mode == "generic_json":
+            # No specific key validation, just ensure it's valid JSON (already parsed)
+            return True, None
+        else:
+            raise ValueError(f"Unknown validation_mode: {validation_mode}")
     # =========================================================================
+    def get_message_google(self, prompt, instructions, max_retries=6, model_override=None, validation_mode="message_generation"):
+        client = genai.Client(api_key=self.get_credential("GOOGLE_API_KEY"))
+        active_model = model_override or self.Core.model
         for attempt in range(max_retries):
             try:
                 response = client.models.generate_content(
+                    model=active_model,
                     contents=prompt,
                     config=types.GenerateContentConfig(
                         thinking_config=types.ThinkingConfig(thinking_budget=0),
                         response_mime_type="application/json"
                     ))
+                # output = json.loads(str(response.text))
                 tokens = {
                     'prompt_tokens': response.usage_metadata.prompt_token_count,
                     'completion_tokens': response.usage_metadata.candidates_token_count,
                     'total_tokens': response.usage_metadata.total_token_count
                 }
+                # Update token counts
                 self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
                 self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
+                self.Core.temp_token_counter += tokens['prompt_tokens']
                 output = self.preprocess_and_parse_json(response.text)
+                # Validate based on mode
+                is_valid, error_msg = self._validate_response(output, validation_mode, attempt)
+                if not is_valid:
+                    print(error_msg)
+                    continue
                 return output
             except json.JSONDecodeError:
     # =========================================================================
+    def get_message_openai(self, prompt, instructions, max_retries=5, validation_mode="message_generation"):
         """
         sending the prompt to openai LLM and get back the response
         """
+        openai.api_key = self.get_credential('OPENAI_API_KEY')
+        client = OpenAI(api_key=self.get_credential('OPENAI_API_KEY'))
         for attempt in range(max_retries):
             try:
                     'total_tokens': response.usage.total_tokens
                 }
+                self.Core.temp_token_counter += tokens['prompt_tokens']
                 try:
                     content = response.choices[0].message.content
                     # Extract JSON code block
                     output = json.loads(content)
+                    # Validate based on mode
+                    is_valid, error_msg = self._validate_response(output, validation_mode, attempt)
+                    if not is_valid:
+                        print(error_msg)
+                        continue
+                    # validating the JSON
+                    self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
+                    self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
+                    self.Core.temp_token_counter += tokens['total_tokens']
                     return output
                 except json.JSONDecodeError:
         print("Max retries exceeded. Returning empty response.")
         return None
+    # ===============================================================
     def preprocess_and_parse_json(self, response: str):
         """
         Remove <think> blocks, extract JSON (from ```json fences or first {...} block),
+        parse with a small repair pass, then sanitize string values to avoid double quotes
+        inside strings (convert " to ' so serialized output won't need escapes), and also
+        strip trailing commas from string values.
         """
         def extract_json(text: str) -> str:
             # Remove commas before } or ]
             return re.sub(r',(\s*[}\]])', r'\1', text)
+        # ---- New: value-level sanitizer ----
+        def sanitize_strings(obj):
+            """
+            Recursively walk obj; for any string value:
+              - replace double quotes with single quotes
+              - remove trailing commas
+            Keys are left unchanged.
+            """
+            if isinstance(obj, str):
+                # Normalize curly quotes to straight
+                s = obj.replace('“', '"').replace('”', '"')
+                # Convert any " to '
+                if '"' in s:
+                    s = s.replace('"', "'")
+                # Strip trailing commas/spaces
+                s = s.rstrip(" ,")
+                return s
+            elif isinstance(obj, list):
+                return [sanitize_strings(v) for v in obj]
+            elif isinstance(obj, tuple):
+                return tuple(sanitize_strings(v) for v in obj)
+            elif isinstance(obj, dict):
+                return {k: sanitize_strings(v) for k, v in obj.items()}
+            else:
+                return obj
         raw = extract_json(response)
         raw = normalize_quotes(raw)
+        def _try_parse(s: str):
+            try:
+                return json.loads(s)
+            except json.JSONDecodeError:
+                return None
+        parsed = _try_parse(raw)
+        if parsed is None:
             # Repair pass
             repaired = strip_comments(raw)
+            repaired = remove_trailing_commas(repaired).strip()
+            parsed = _try_parse(repaired)
+        if parsed is None:
+            # Last resort: fail closed
+            return None
+        # Post-parse sanitization: remove double quotes + trailing commas inside string values
+        sanitized = sanitize_strings(parsed)
+        return sanitized
+    # ===============================================================
+    # Helper functions for fall back strategy
+    # shifting to other models in case of failure more than expected
     # ===============================================================
+    def _record_success(self, model_name: str):
+        self.failure_counts[model_name] = 0
+    # ========================================================
+    def _record_failure(self, model_name: str):
+        self.failure_counts[model_name] = self.failure_counts.get(model_name, 0) + 1
+    # ========================================================
+    def _should_promote(self, model_name: str):
+        return self.failure_counts.get(model_name, 0) >= self.failure_threshold
+    # ========================================================
+    def _next_google_model(self, current: str):
+        if not self.google_models:
+            return None
+        if current not in self.google_models:
+            return self.google_models[0]
+        idx = self.google_models.index(current)
+        return self.google_models[(idx + 1) % len(self.google_models)]
+    # ========================================================
+    def _promote_google_model(self, new_model: str):
+        """Permanently switch default to new_model (Google)."""
+        if new_model and new_model in self.google_models:
+            self.Core.model = new_model
+            self.model = new_model
+            self.model_type = "google"
+            # reset its counter so we don't immediately bounce again
+            self.failure_counts[new_model] = 0
+            # (optional) log
+            print(f"[LLM] Permanently switched default Google model to: {new_model}")
+    #========================================================
+    def _get_response_google_with_fallback(self, prompt, instructions, validation_mode="message_generation"):
         """
+        Try current Google model; if it fails, walk through the rest of google_models just for THIS call.
+        If current model crosses threshold, permanently promote the next model.
         """
+        if self.Core.openai_fallback_enabled is True:
+            response = self.get_message_openai(prompt, instructions, validation_mode=validation_mode)
+            return response
+        elif self.Core.openai_fallback_enabled is False:
+            if not self.google_models:
+                raise RuntimeError("No Google models configured.")
+            current = self.Core.model if self.Core.model in self.google_models else self.google_models[0]
+            # Build per-request trial order: current first, then the rest in ring order
+            start_idx = self.google_models.index(current)
+            trial_order = self.google_models[start_idx:] + self.google_models[:start_idx]
+            original_model = current
+            for idx, model_name in enumerate(trial_order):
+                output = self.get_message_google(
+                    prompt,
+                    instructions,
+                    max_retries=self.Core.config_file.get("per_model_retries", 6),
+                    model_override=model_name,
+                    validation_mode=validation_mode
+                )
+                if output is not None:
+                    # success path
+                    self._record_success(model_name)
+                    # If we had to fallback away from the original model, count that as a failure for it.
+                    if idx > 0:
+                        self._record_failure(original_model)
+                        if self._should_promote(original_model):
+                            self._promote_google_model(model_name)
+                    return output
+                # failed for this model_name
+                self._record_failure(model_name)
+                logger.info(f"Google model '{model_name}' failed after retries.")
+            # All google models fail --> Fall back to openai model
+            self.Core.openai_fallback_enabled = True
+            self.Core.model = self.openai_fallback_model
+            self.Core.reasoning_model = True #fall back model is a reasoning model --> gpt5
+            logger.info(" ❌ [LLM] All Google models failed; attempting OpenAI fallback.")
+            print()
+            return self._get_response_google_with_fallback(prompt, instructions, validation_mode)
+        else:
+            # Nothing worked
+            logger.info(" ❌ All Google models and Openai fallback failed.")
+            raise RuntimeError("All Google models and Openai fallback failed.")

{Messaging_system → ai_messaging_system_v2/Messaging_system}/LLMR.py RENAMED Viewed

@@ -3,17 +3,16 @@ This class is a LLM based recommender that can choose the perfect content for th
 """
 import json
-import os
-import random
 import pandas as pd
 import openai
 from openai import OpenAI
 from dotenv import load_dotenv
 import time
-import streamlit as st
 from tqdm import tqdm
-from Messaging_system.Homepage_Recommender import DefaultRec
 load_dotenv()
@@ -27,7 +26,7 @@ class LLMR:
         self.selected_content_ids = []  # will be populated for each user
         self.random=random
-    def get_recommendations(self, progress_callback):
         """
         selecting the recommended content for each user
         :return:
@@ -38,7 +37,7 @@ class LLMR:
         self.Core.users_df["recommendation_info"] = None
         total_users = len(self.Core.users_df)
-        st.write("Choosing the best content to recommend ... ")
         self.Core.start_time = time.time()
         for progress, (idx, row) in enumerate(
@@ -77,6 +76,10 @@ class LLMR:
         :return: content_id
         """
         if self.random:  # select recommendations randomly from top options
             return self._get_recommendation_random()
@@ -401,62 +404,122 @@ You are a helpful educational music content recommender. Your goal is to choose
     # ==========================================================================
     # Randomly select recommendations from top options
     # ==========================================================================
-    # main random selector ---
     def _get_recommendation_random(self):
         """
-        Randomly pick ONE item from the top-5 of each requested section.
         Also remove the picked item from every section in recsys_json.
-        Returns: (content_id, content_info, updated_recsys_json, zero_tokens_dict)
         """
-        # 1) Get user's recsys_result or fall back
         recsys_json = self._get_user_recommendation()
         try:
             recsys_data = json.loads(recsys_json) if recsys_json else {}
         except Exception:
             recsys_data = {}
-        # 2) Build candidate pool (top 5 per section)
         sections = self.Core.recsys_contents
-        candidates = self._collect_top_k(recsys_data, sections, k=5)
         # 3) Cold start or empty? -> use popular contents
-        if not candidates:
             recsys_data = self._get_popular_fallback_json(k=5)
-            candidates = self._collect_top_k(recsys_data, sections, k=5)
         # Still nothing? bail out
-        if not candidates:
             return None, None, None, None
-        # 4) Deduplicate by content_id, then pick randomly
-        seen = set()
-        unique_candidates = []
-        for rec in candidates:
-            cid = rec.get("content_id")
-            if cid not in seen:
-                seen.add(cid)
-                unique_candidates.append(rec)
-        picked_rec = random.choice(unique_candidates)
-        picked_id = picked_rec["content_id"]
-        recommendation_dict = self._get_recommendation_info(picked_id, recsys_data)
-        # 5) Remove picked_id from ALL sections and store back
-        recsys_data = self._remove_selected_from_all(recsys_data, picked_id)
-        # 6) Track available ids if you still need it elsewhere
-        self.selected_content_ids = [r["content_id"] for r in unique_candidates]
-        # 7) Prepare return values
-        content_info = self._get_content_info(picked_id)
         updated_json = json.dumps(recsys_data)
         zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}
         return recommendation_dict, content_info, updated_json, zero_tokens
     #======================================================================
     # helpers used by the random path
     #======================================================================
     def _get_recommendation_info(self, content_id, recsys_data):
         # Search through all categories in the recsys data
         found_item=None
@@ -536,3 +599,94 @@ You are a helpful educational music content recommender. Your goal is to choose
                 recsys_data[sec] = [r for r in recs if r.get("content_id") != content_id]
         return recsys_data

 """
 import json
 import pandas as pd
 import openai
 from openai import OpenAI
 from dotenv import load_dotenv
 import time
 from tqdm import tqdm
+from .Homepage_Recommender import DefaultRec
+import logging
+logger = logging.getLogger()
 load_dotenv()
         self.selected_content_ids = []  # will be populated for each user
         self.random=random
+    def get_recommendations(self, progress_callback=None):
         """
         selecting the recommended content for each user
         :return:
         self.Core.users_df["recommendation_info"] = None
         total_users = len(self.Core.users_df)
+        logger.info("🎯 Choosing the best content to recommend ...")
         self.Core.start_time = time.time()
         for progress, (idx, row) in enumerate(
         :return: content_id
         """
+        # NEW: Check if specific_content_id is set - if so, use it for all users
+        if self.Core.specific_content_id is not None:
+            return self._get_specific_content()
         if self.random:  # select recommendations randomly from top options
             return self._get_recommendation_random()
     # ==========================================================================
     # Randomly select recommendations from top options
     # ==========================================================================
     def _get_recommendation_random(self):
         """
+        Randomly pick ONE valid item from the top-5 of each requested section.
+        If the first random pick is missing/invalid, keep trying other candidates.
         Also remove the picked item from every section in recsys_json.
+        Returns: (recommendation_dict, content_info, updated_recsys_json, zero_tokens_dict)
         """
+        import json, random
+        # 1) Get user's recsys_result or fall back to {}
         recsys_json = self._get_user_recommendation()
         try:
             recsys_data = json.loads(recsys_json) if recsys_json else {}
         except Exception:
             recsys_data = {}
         sections = self.Core.recsys_contents
+        # 2) Primary candidate set
+        unique_candidates = self.build_unique_candidates(recsys_data, sections)
         # 3) Cold start or empty? -> use popular contents
+        used_popular_fallback = False
+        if not unique_candidates:
             recsys_data = self._get_popular_fallback_json(k=5)
+            unique_candidates = self.build_unique_candidates(recsys_data, sections)
+            used_popular_fallback = True
         # Still nothing? bail out
+        if not unique_candidates:
             return None, None, None, None
+        # 4) Try candidates in random order until a valid one is found
+        idxs = list(range(len(unique_candidates)))
+        random.shuffle(idxs)
+        picked_id, recommendation_dict, content_info = self.try_pick_from_candidates(idxs, unique_candidates, recsys_data)
+        # 5) If nothing valid from primary set, and we haven't tried popular fallback yet, try it now
+        if picked_id is None and not used_popular_fallback:
+            recsys_data = self._get_popular_fallback_json(k=5)
+            unique_candidates = self.build_unique_candidates(recsys_data, sections)
+            if unique_candidates:
+                idxs = list(range(len(unique_candidates)))
+                random.shuffle(idxs)
+                picked_id, recommendation_dict, content_info = self.try_pick_from_candidates(idxs, unique_candidates, recsys_data)
+        # 6) If still nothing, bail out
+        if picked_id is None:
+            return None, None, None, None
+        # 7) Remove picked_id from ALL sections and store back
+        recsys_data = self._remove_selected_from_all(recsys_data, picked_id)
+        # 8) Track available ids if you still need it elsewhere
+        self.selected_content_ids = [r["content_id"] for r in unique_candidates if r.get("content_id")]
+        # 9) Prepare return values
         updated_json = json.dumps(recsys_data)
         zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}
         return recommendation_dict, content_info, updated_json, zero_tokens
+    # ====================================================================
+    def build_unique_candidates(self,src_data, sections):
+        # Build candidate pool (top 5 per section) and dedupe by content_id
+        cands = self._collect_top_k(src_data, sections, k=5)
+        seen, uniq = set(), []
+        for rec in cands or []:
+            cid = rec.get("content_id")
+            if cid and cid not in seen:
+                seen.add(cid)
+                uniq.append(rec)
+        return uniq
+    # ======================================================================
+    def try_pick_from_candidates(self, idxs, candidates, source_data):
+        """
+        Iterate candidates in random order, returning the first valid pick:
+        (picked_id, recommendation_dict, content_info) or (None, None, None)
+        """
+        banned_contents = set(self.Core.config_file.get("banned_contents", []))  # use set for faster lookup
+        for i in idxs:
+            rec = candidates[i]
+            picked_id = rec.get("content_id")
+            if not picked_id:
+                continue
+            # Skip if content is banned
+            if picked_id in banned_contents:
+                continue
+            try:
+                # Validate we can fetch both info payloads
+                content_info = self._get_content_info(picked_id)
+                if not content_info:
+                    # Treat falsy/empty as invalid and keep searching
+                    continue
+                recommendation_dict = self._get_recommendation_info(picked_id, source_data)
+                # If both succeed, we have a winner
+                return picked_id, recommendation_dict, content_info
+            except IndexError:
+                # Your reported failure mode; skip this candidate
+                continue
+            except KeyError:
+                continue
+            except Exception:
+                # Any unexpected data issue: skip and try the next
+                continue
+        return None, None, None
     #======================================================================
     # helpers used by the random path
     #======================================================================
+    # =====================================================================
     def _get_recommendation_info(self, content_id, recsys_data):
         # Search through all categories in the recsys data
         found_item=None
                 recsys_data[sec] = [r for r in recs if r.get("content_id") != content_id]
         return recsys_data
+    # =====================================================================
+    def _get_specific_content(self):
+        """
+        Get a specific content for all users when specific_content_id is set.
+        This overrides the AI recommendation system.
+        Returns:
+            tuple: (recommendation_dict, content_info, recsys_json, zero_tokens)
+        Raises:
+            ValueError: If specific_content_id is not found in content_info
+        """
+        import json
+        specific_content_id = self.Core.specific_content_id
+        # Query content_info DataFrame for the specific content
+        content_row = self.Core.content_info[self.Core.content_info['content_id'] == specific_content_id]
+        if content_row.empty:
+            error_msg = f"❌ ERROR: specific_content_id {specific_content_id} not found in content database for brand '{self.Core.brand}'"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        # Extract content details from DataFrame
+        try:
+            content_info = content_row['content_info'].iloc[0]
+            web_url_path = content_row['web_url_path'].iloc[0]
+            thumbnail_url = content_row['thumbnail_url'].iloc[0]
+            title = content_row['content_title'].iloc[0]  # Map content_title to title
+            # Build recommendation dict in the same format as current system
+            recommendation_dict = {
+                "content_id": specific_content_id,
+                "web_url_path": web_url_path,
+                "title": title,
+                "thumbnail_url": thumbnail_url
+            }
+            # Build recsys_json for consistency (even though it's the same for all users)
+            recsys_data = {
+                "specific_content": [{
+                    "content_id": specific_content_id,
+                    "web_url_path": web_url_path,
+                    "title": title,
+                    "thumbnail_url": thumbnail_url,
+                    "recommendation_rank": 1
+                }]
+            }
+            recsys_json = json.dumps(recsys_data)
+            # No tokens used since we're not calling LLM
+            zero_tokens = {"prompt_tokens": 0, "completion_tokens": 0}
+            logger.info(f"✅ Using specific content {specific_content_id} for all users: {title}")
+            return recommendation_dict, content_info, recsys_json, zero_tokens
+        except KeyError as e:
+            error_msg = f"❌ ERROR: Missing required field in content database for content_id {specific_content_id}: {str(e)}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        except IndexError as e:
+            error_msg = f"❌ ERROR: Unable to extract content data for content_id {specific_content_id}: {str(e)}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+    # =====================================================================
+    def get_followup_recommendation(self):
+        """
+        get follow up recommendation for all users considering available contents
+        :return:
+        """
+        default = DefaultRec(self.Core)
+        for idx, row in self.Core.users_df.iterrows():
+            self.user = row
+            recommendation_dict, content_info, recsys_json, _ = self._get_recommendation()
+            if recommendation_dict["content_id"] is None:  # error in selecting a content to recommend
+                self.Core.users_df.at[idx, "recommendation"] = default.recommendation
+                self.Core.users_df.at[idx, "recommendation_info"] = default.recommendation_info
+                self.Core.users_df.at[idx, "recsys_result"] = default.for_you_url
+            else:
+                self.Core.users_df.at[idx, "recommendation"] = recommendation_dict
+                self.Core.users_df.at[idx, "recommendation_info"] = content_info
+                self.Core.users_df.at[idx, "recsys_result"] = recsys_json
+        return self.Core

{Messaging_system → ai_messaging_system_v2/Messaging_system}/Message_generator.py RENAMED Viewed

@@ -1,16 +1,16 @@
 """
-THis class will generate message or messages based on the number of requested.
 """
 import json
 import time
 from openai import OpenAI
 from tqdm import tqdm
-import streamlit as st
-from Messaging_system.MultiMessage import MultiMessage
-from Messaging_system.protection_layer import ProtectionLayer
 import openai
-from Messaging_system.LLM import LLM
 class MessageGenerator:
@@ -18,84 +18,206 @@ class MessageGenerator:
     def __init__(self, CoreConfig):
         self.Core = CoreConfig
         self.llm = LLM(CoreConfig)
     # =================================================================
-    # =================================================================
-    def generate_messages(self, progress_callback):
         """
-        generating messages based on prompts for each user
-        :return: updating message column for each user
         """
         total_users = len(self.Core.users_df)
-        st.write("Generating messages ... ")
         self.Core.start_time = time.time()
-        for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages")):
-            # if we have a prompt to generate a personalized message
             # Update progress if callback is provided
             if progress_callback is not None:
                 progress_callback(progress, total_users)
-            if row["prompt"] is not None:
-                first_message = self.llm.get_response(prompt=row["prompt"], instructions=self.llm_instructions())
-                if first_message is not None:
-                    # adding protection layer
-                    # protect = ProtectionLayer(CoreConfig=self.Core)
-                    # message, total_tokens = protect.criticize(message=first_message, user=row)
-                    message = first_message
-                    # updating tokens
-                    # self.Core.total_tokens['prompt_tokens'] += total_tokens['prompt_tokens']
-                    # self.Core.total_tokens['completion_tokens'] += total_tokens['completion_tokens']
-                    # self.Core.temp_token_counter += total_tokens['prompt_tokens'] + total_tokens['completion_tokens']
-                    # double check output structure
-                    if isinstance(message, dict) and "message" in message and isinstance(message["message"], str):
-                        # parsing output result
-                        message = self.parsing_output_message(message, row)
-                        self.Core.users_df.at[idx, "message"] = message
-                        row["message"] = message
-                    else:
                         self.Core.users_df.at[idx, "message"] = None
-                    self.Core.checkpoint()
-                    self.Core.respect_request_ratio()
                 else:
                     self.Core.users_df.at[idx, "message"] = None
-            # generating subsequence messages if needed:
-            if isinstance(self.Core.subsequent_examples, dict) and len(self.Core.subsequent_examples.keys()) > 1 and \
-                    self.Core.users_df.at[idx, "message"] is not None and row["message"] is not None:
-                MM = MultiMessage(self.Core)
-                message = MM.generate_multi_messages(row)
-                self.Core.users_df.at[idx, "message"] = message
-            else:
-            # ---------------------------------------------------------
-            # SINGLE-MESSAGE path
-            # ---------------------------------------------------------
-                single_msg = row["message"] or self.Core.users_df.at[idx, "message"]
-                if single_msg is not None:
-                    # If the single message is still a JSON string, turn it into a dict first
-                    if isinstance(single_msg, str):
                         try:
-                            single_msg = json.loads(single_msg)
                         except json.JSONDecodeError:
-                            # leave it as-is if it’s not valid JSON
-                            pass
-                    msg_wrapper = {"messages_sequence": [single_msg]}
-                    # Again, store a proper JSON string
-                    self.Core.users_df.at[idx, "message"] = json.dumps(msg_wrapper,
-                                                                       ensure_ascii=False)
                 else:
                     self.Core.users_df.at[idx, "message"] = None
         return self.Core
     # --------------------------------------------------------------
     # --------------------------------------------------------------
     def parsing_output_message(self, message, user):
@@ -136,24 +258,31 @@ class MessageGenerator:
     def fetch_recommendation_data(self, user, message):
         if user["recommendation"] == "for_you":
             output_message = {
                 "header": message.get("header"),
                 "message": message.get("message"),
                 "content_id": None,
-                "web_url_path": user["recsys_result"],
                 "title": user["recommendation"],
-                "thumbnail_url": None
             }
         else:
             recommendation_dict = user["recommendation"]
             content_id = int(recommendation_dict["content_id"])
-            # Extract required fields from found_item
             web_url_path = recommendation_dict["web_url_path"]
             title = recommendation_dict["title"]
             thumbnail_url = recommendation_dict["thumbnail_url"]
-            message["message"].replace('\\', '').replace('"', '')
             # Add these to the message dict
             output_message = {
@@ -162,7 +291,8 @@ class MessageGenerator:
                 "content_id": content_id,
                 "web_url_path": web_url_path,
                 "title": title,
-                "thumbnail_url": thumbnail_url
             }
         return output_message
@@ -174,38 +304,25 @@ class MessageGenerator:
         :return: instructions as string
         """
-        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
-        banned_phrases = self.Core.config_file.get(f"{self.Core.brand}_banned_phrases", None)
-        instructions = f"""
-You are a copywriter. Your task is to write a 'header' and a 'message' as a push notification for a {self.Core.get_instrument()} student. It is critical  that the message sounds like natural, everyday speech: friendly, concise, no jargon, and it must follow the instructions.
-Write a SUPER CASUAL and NATURAL push notification, as if you are chatting over coffee. Avoid odd phrasings. The message should sound like something that a {self.Core.get_instrument()} instructor would realistically say to a student in a daily conversation.
-"""
-        if self.Core.brand_voice is not None:
-                instructions += f"""
-Here are some examples of things that an instructor would realistically say to a student, to give you a general sense of tone and phrasing:
-Common instructor phrases:
-{self.Core.brand_voice}
-"""
-        rules = f"""
-ABSOLUTE RULE – OVERRIDES EVERYTHING ELSE:
-the header and the message **MUST NOT** contain any banned word or phrases(case-insensitive; singular, plural, verb forms, or their derivatives)
-- **important Note:** header **must be** less than {self.Core.config_file["header_limit"]} characters and message **must be less** than {self.Core.config_file["message_limit"]} characters.
-Banned word:
 {jargon_list}
 """
-        if banned_phrases is not None:
-            rules += banned_phrases
-        final_instructions = f"""
-{instructions}
-{rules}
-        """
-        return final_instructions

 """
+This class will generate message or messages based on the number of requested.
+Now uses agentic workflow for enhanced quality control.
 """
 import json
 import time
 from openai import OpenAI
 from tqdm import tqdm
 import openai
+from .LLM import LLM
+from .agents import AgentOrchestrator, RejectionLogger
+import logging
+logger = logging.getLogger()
 class MessageGenerator:
     def __init__(self, CoreConfig):
         self.Core = CoreConfig
         self.llm = LLM(CoreConfig)
+        self.use_agentic_workflow = True  # Enable agentic workflow by default
+        self.agent_orchestrator = None
+        self.rejection_logger = None
     # =================================================================
+    def structured_output(self, msg, step):
+        """
+        creating final structured JSON output to store for the given step
+        :return:
         """
+        if msg is not None:
+            # If it's still a JSON string, turn it into a dict
+            if isinstance(msg, str):
+                try:
+                    msg = json.loads(msg)
+                except json.JSONDecodeError:
+                    raise ValueError(f"output is not a JSON string: \n {msg}") # Leave as-is if not valid JSON
+            # Build the new wrapper with step integer key
+            msg_wrapper = {
+                str(step): {  # convert to str so it's valid JSON key
+                    "header": msg.get("header"),
+                    "message": msg.get("message"),
+                    "content_id": msg.get("content_id"),
+                    "web_url_path": msg.get("web_url_path"),
+                    "title": msg.get("title"),
+                    "thumbnail_url": msg.get("thumbnail_url"),
+                    "deeplink" : msg.get("deeplink")
+                }
+            }
+            return msg_wrapper
+        else:
+            return None
+    # ==================================================================
+    def generate_messages(self, step=1, progress_callback=None):
         """
+        Generate messages using the agentic workflow (new default) or legacy method.
+        Args:
+            step: Campaign stage number
+            progress_callback: Optional progress callback function
+        Returns:
+            Updated CoreConfig instance
+        """
+        if self.use_agentic_workflow:
+            return self.generate_messages_with_agents(step, progress_callback)
+        else:
+            return self.generate_messages_legacy(step, progress_callback)
+    def generate_messages_with_agents(self, step=1, progress_callback=None):
+        """
+        Generate messages using the agentic workflow with GeneratorAgent and SecurityAgent.
+        Args:
+            step: Campaign stage number
+            progress_callback: Optional progress callback function
+        Returns:
+            Updated CoreConfig instance
+        """
+        # CRITICAL: Deduplicate users_df by user_id before processing
+        # This ensures we only generate ONE message per user, not multiple messages for duplicate rows
+        initial_count = len(self.Core.users_df)
+        user_id_col = 'user_id' if 'user_id' in self.Core.users_df.columns else 'USER_ID'
+        if user_id_col in self.Core.users_df.columns:
+            # Keep the first occurrence of each user_id
+            self.Core.users_df = self.Core.users_df.drop_duplicates(subset=[user_id_col], keep='first')
+            deduped_count = len(self.Core.users_df)
+            if initial_count > deduped_count:
+                logger.warning(f"⚠️  Removed {initial_count - deduped_count} duplicate user rows before message generation")
+                print(f"⚠️  Removed {initial_count - deduped_count} duplicate user rows before message generation")
         total_users = len(self.Core.users_df)
+        logger.info(f"⏳ Generating messages for {total_users} users using agentic workflow")
+        # Initialize rejection logger
+        campaign_name = getattr(self.Core, 'campaign_name', 'unknown_campaign')
+        self.rejection_logger = RejectionLogger(
+            campaign_name=campaign_name,
+            brand=self.Core.brand,
+            stage=step
+        )
+        # Initialize agent orchestrator
+        self.agent_orchestrator = AgentOrchestrator(
+            core_config=self.Core,
+            rejection_logger=self.rejection_logger
+        )
         self.Core.start_time = time.time()
+        for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages (agentic)")):
             # Update progress if callback is provided
             if progress_callback is not None:
                 progress_callback(progress, total_users)
+            # Use agent orchestrator to generate and validate
+            result = self.agent_orchestrator.generate_and_validate_message(row, step)
+            if result is not None:
+                # Message was approved
+                header = result["header"]
+                message_text = result["message"]
+                # Create message dict for parsing
+                message_dict = {
+                    "header": header,
+                    "message": message_text
+                }
+                # Parse and enrich the message
+                parsed_message = self.parsing_output_message(message_dict, row)
+                if parsed_message:
+                    final_message = self.structured_output(parsed_message, step)
+                    try:
+                        self.Core.users_df.at[idx, "message"] = json.dumps(final_message, ensure_ascii=False)
+                    except json.JSONDecodeError:
                         self.Core.users_df.at[idx, "message"] = None
                 else:
                     self.Core.users_df.at[idx, "message"] = None
+            else:
+                # Failed after all attempts
+                self.Core.users_df.at[idx, "message"] = None
+            # Respect rate limits
+            self.Core.respect_request_ratio()
+        # Log rejection statistics
+        if self.rejection_logger:
+            stats = self.rejection_logger.get_rejection_stats()
+            logger.info(f"Rejection stats: {stats}")
+            logger.info(f"Rejection log saved to: {self.rejection_logger.get_log_path()}")
+        return self.Core
+    def generate_messages_legacy(self, step=1, progress_callback=None):
+        """
+        Legacy message generation method (original implementation).
+        Args:
+            step: Campaign stage number
+            progress_callback: Optional progress callback function
+        Returns:
+            Updated CoreConfig instance
+        """
+        # CRITICAL: Deduplicate users_df by user_id before processing
+        # This ensures we only generate ONE message per user, not multiple messages for duplicate rows
+        initial_count = len(self.Core.users_df)
+        user_id_col = 'user_id' if 'user_id' in self.Core.users_df.columns else 'USER_ID'
+        if user_id_col in self.Core.users_df.columns:
+            # Keep the first occurrence of each user_id
+            self.Core.users_df = self.Core.users_df.drop_duplicates(subset=[user_id_col], keep='first')
+            deduped_count = len(self.Core.users_df)
+            if initial_count > deduped_count:
+                logger.warning(f"⚠️  Removed {initial_count - deduped_count} duplicate user rows before message generation")
+                print(f"⚠️  Removed {initial_count - deduped_count} duplicate user rows before message generation")
+        total_users = len(self.Core.users_df)
+        logger.info("⏳ generating messages for {} users (legacy mode)".format(total_users))
+        self.Core.start_time = time.time()
+        for progress, (idx, row) in enumerate(tqdm(self.Core.users_df.iterrows(), desc="generating messages")):
+            # if we have a prompt to generate a personalized message
+            # Update progress if callback is provided
+            if progress_callback is not None:
+                progress_callback(progress, total_users)
+            if row["prompt"] is not None:
+                message = self.llm.get_response(prompt=row["prompt"], instructions=self.llm_instructions())
+                if message is not None:
+                    # double check output structure
+                    if isinstance(message, dict) and "message" in message and isinstance(message["message"], str):
+                        # parsing output result
+                        message = self.parsing_output_message(message, row)
+                        final_message = self.structured_output(message, step)
                         try:
+                            self.Core.users_df.at[idx, "message"] = json.dumps(final_message, ensure_ascii=False)
+                            row["message"] = final_message
                         except json.JSONDecodeError:
+                            self.Core.users_df.at[idx, "message"] = None
+                    else:
+                        self.Core.users_df.at[idx, "message"] = None
+                    # self.Core.checkpoint()
+                    self.Core.respect_request_ratio()
                 else:
                     self.Core.users_df.at[idx, "message"] = None
         return self.Core
     # --------------------------------------------------------------
     # --------------------------------------------------------------
     def parsing_output_message(self, message, user):
     def fetch_recommendation_data(self, user, message):
         if user["recommendation"] == "for_you":
+            web_url_path = user["recsys_result"]
+            deeplink = web_url_path.replace("https://www.musora.com", "musora:/")
             output_message = {
                 "header": message.get("header"),
                 "message": message.get("message"),
                 "content_id": None,
+                "web_url_path": web_url_path,
                 "title": user["recommendation"],
+                "thumbnail_url": None,
+                "deeplink": deeplink
             }
         else:
             recommendation_dict = user["recommendation"]
             content_id = int(recommendation_dict["content_id"])
+            # Extract required fields
             web_url_path = recommendation_dict["web_url_path"]
             title = recommendation_dict["title"]
             thumbnail_url = recommendation_dict["thumbnail_url"]
+            # Clean up message text (although this doesn’t change `message` in place)
+            message["message"] = message["message"].replace('\\', '').replace('"', '')
+            deeplink = web_url_path.replace("https://www.musora.com", "musora:/")
             # Add these to the message dict
             output_message = {
                 "content_id": content_id,
                 "web_url_path": web_url_path,
                 "title": title,
+                "thumbnail_url": thumbnail_url,
+                "deeplink": deeplink
             }
         return output_message
         :return: instructions as string
         """
+        banned_phrases = "\n".join(f"- {word}" for word in self.Core.config_file["AI_Jargon"])
+        jargon_list = "\n".join(f"- {word}" for word in self.Core.config_file[f"AI_phrases_{self.Core.brand}"])
+        if self.Core.personalization:
+            instructions = f"""
+Your task is to select the best 'header' and a 'message' for a {self.Core.get_instrument()} student as a push notification.
+Based on the user instructions, you might need to **modify the selected option** very minimal and slightly to improve personalization if capable while preserving the original brand voice, tone, rhythm, and structure.
+**Important Note**: header < {self.Core.config_file["header_limit"]} and message < {self.Core.config_file["message_limit"]} characters.
+**Important Note**: NEVER use time-related words (“new,” “recent,” “latest,” etc.) and NEVER imply recency in any way.
+# Don't use below phrases, words, or similar variations of them:
+{banned_phrases}
 {jargon_list}
 """
+        else:
+            instructions = f"""
+Your task is to select the best 'header' and a 'message' for a {self.Core.get_instrument()} student as a push notification.
+DO NOT **change** or **modify** or **add to** the selected option in any shape or form. **Use the exact original selected header and message without ANY change**
+"""
+        return instructions

ai_messaging_system_v2/Messaging_system/Permes.py ADDED Viewed

	@@ -0,0 +1,412 @@

+"""
+the flow of the Program starts from create_personalized_message function
+"""
+import pandas as pd
+import os
+from pathlib import Path
+from .DataCollector import DataCollector
+from .CoreConfig import CoreConfig
+from .LLMR import LLMR
+from .Message_generator import MessageGenerator
+from .PromptGenerator import PromptGenerator
+try:
+    from ..database import DatabaseManager
+except ImportError:
+    from pathlib import Path
+    import sys
+    sys.path.append(str(Path(__file__).parent.parent))
+    from database import DatabaseManager
+from .Homepage_Recommender import DefaultRec
+from datetime import datetime, timezone
+import logging
+logger = logging.getLogger()
+class Permes:
+    """
+    LLM-based personalized message generator:
+    """
+    # UI mode constants
+    UI_OUTPUT_DIR = Path(__file__).parent.parent / "Data" / "ui_output"
+    UI_OUTPUT_FILE = "messages.csv"
+    def create_personalize_messages(self, session, users, brand, config_file,
+                                    platform="push", stage=1, test_mode=False, mode="production"
+                                    , recsys_contents=None, model=None, identifier_column="email", segment_info=None,
+                                    sample_example=None, number_of_samples=None, involve_recsys_result=True,
+                                    personalization=True, campaign_name="no_recent_activity",
+                                    campaign_instructions=None, per_message_instructions=None,
+                                    specific_content_id=None, ui_experiment_id=None):
+        """
+        :param campaign_name:
+        :param session: Snowflake session object
+        :param users: users dataframe
+        :param brand: brand name
+        :param config_file:
+        :param platform: push/app
+        :param stage: message number
+        :param test_mode: Boolean, if True uses test campaign name
+        :param mode: str, operating mode - "production", "test", or "ui"
+        :param recsys_contents: [course, quicktip, workout, song]
+        :param model: llm model name
+        :param identifier_column: email/user_id
+        :param segment_info: common info about users
+        :param sample_example: sample message
+        :param number_of_samples: number of messages to generate
+        :param involve_recsys_result: Boolean, recommend a content?
+        :param personalization: Boolean, personalized messages?
+        :param campaign_instructions: Optional campaign-wide instructions
+        :param per_message_instructions: Optional stage-specific instructions
+        :param specific_content_id: Optional content ID to force for all users
+        :param ui_experiment_id: Optional experiment ID for UI mode (e.g., 'messages_a_drumeo_20260111_1756')
+        :return:
+        """
+        # primary processing
+        users = self.identify_users(users_df=users, identifier_column=identifier_column)
+        personalize_message = CoreConfig(session=session,
+                                         users_df=users,
+                                         brand=brand,
+                                         platform=platform,
+                                         config_file=config_file)
+        personalize_message.set_segment_name(campaign_name=campaign_name)
+        if sample_example is not None:  # Check if sample_example is not empty
+            personalize_message.set_sample_example(sample_example)
+        if number_of_samples is not None:
+            personalize_message.set_number_of_samples(number_of_samples)
+        if model is not None:
+            personalize_message.set_llm_model(model)
+        if segment_info is not None:
+            personalize_message.set_segment_info(segment_info)
+        if personalization:
+            personalize_message.set_personalization()
+        if involve_recsys_result:
+            personalize_message.set_messaging_mode("recsys_result")
+            personalize_message.set_involve_recsys_result(involve_recsys_result)
+        if recsys_contents is not None:
+            personalize_message.set_recsys_contents(recsys_contents)
+        # Set campaign and per-message instructions
+        if campaign_instructions is not None:
+            personalize_message.campaign_instructions = campaign_instructions
+        if per_message_instructions is not None:
+            personalize_message.per_message_instructions = per_message_instructions
+        # Set specific content ID for forcing specific content for all users
+        if specific_content_id is not None:
+            personalize_message.specific_content_id = specific_content_id
+        if stage == 1:
+            users_df = self._create_personalized_message(CoreConfig=personalize_message)
+        else:
+            users_df = self._create_followup_personalized_message(CoreConfig=personalize_message, stage=stage, test_mode=test_mode, mode=mode, ui_experiment_id=ui_experiment_id)
+        if users_df is None:
+            return users_df
+        total_prompt_tokens = personalize_message.total_tokens["prompt_tokens"]
+        total_completion_tokens = personalize_message.total_tokens["completion_tokens"]
+        total_cost = self.calculate_cost(total_prompt_tokens, total_completion_tokens, model)
+        logger.info(f"Estimated Cost (USD): {total_cost:.5f} ---> Number of messages: {(len(users_df))}")
+        print(f"Estimated Cost (USD): {total_cost:.5f} ---> Number of messages: {(len(users_df))}")
+        now_utc = datetime.now(timezone.utc)
+        # Create dataframe
+        message_cost = pd.DataFrame([{
+            "brand": brand,
+            "campaign_name": campaign_name,
+            "number_of_messages": len(users_df),
+            "model": model,
+            "stage": stage,
+            "total_prompt_tokens": total_prompt_tokens,
+            "total_completion_tokens": total_completion_tokens,
+            "total_cost": total_cost,
+            "timestamp": now_utc,
+        }])
+        snowflake_conn = DatabaseManager(session=session, brand=brand)
+        final_df = snowflake_conn.adjust_dataframe(users_df, stage)
+        # CRITICAL: Final deduplication by (user_id, stage) before storage
+        # This is a safety net to ensure no duplicates are ever written
+        initial_count = len(final_df)
+        user_id_col = 'user_id' if 'user_id' in final_df.columns else 'USER_ID'
+        if user_id_col in final_df.columns and 'stage' in final_df.columns:
+            # Keep the first occurrence of each (user_id, stage) pair
+            final_df = final_df.drop_duplicates(subset=[user_id_col, 'stage'], keep='first')
+            deduped_count = len(final_df)
+            if initial_count > deduped_count:
+                logger.warning(f"⚠️  Removed {initial_count - deduped_count} duplicate (user_id, stage) records before storage")
+                print(f"⚠️  Removed {initial_count - deduped_count} duplicate (user_id, stage) records before storage")
+        # UI mode: Store to local CSV instead of Snowflake
+        if mode == "ui":
+            self._store_to_csv_ui_mode(final_df, message_cost, ui_experiment_id)
+            snowflake_conn.close_connection()
+        else:
+            # Production/Test mode: Store to Snowflake
+            snowflake_conn.store_df_to_snowflake(table_name="initial_messages", dataframe=final_df,
+                                                 database=None, schema=None, overwrite=False)
+            snowflake_conn.store_df_to_snowflake(table_name="message_cost", dataframe=message_cost,
+                                                 database=None, schema=None, overwrite=False)
+            snowflake_conn.close_connection()
+        return users_df
+    #======================================================
+    def calculate_cost(self, total_prompt_tokens, total_completion_tokens, model):
+        input_price, output_price = self.get_model_price(model)
+        total_cost = ((total_prompt_tokens / 1000000) * input_price) + (
+                (total_completion_tokens / 1000000) * output_price)  # Cost calculation estimation
+        return total_cost
+    # ====================================================
+    def get_model_price(self, model):
+        """
+        getting the input price and output price per 1m token for the requested model
+        :param model:
+        :return:
+        """
+        input_prices = {
+            "gpt-4o-mini":0.15,
+            "gpt-4.1-mini":0.4,
+            "gpt-5-mini": 0.25,
+            "gpt-5-nano": 0.05,
+            "gemini-2.5-flash":0.3,
+            "gemini-2.0-flash":0.1,
+            "gemini-2.5-flash-lite":0.1
+        }
+        out_prices = {
+            "gpt-4o-mini":0.6,
+            "gpt-4.1-mini":1.6,
+            "gpt-5-mini": 2,
+            "gpt-5-nano": 0.4,
+            "gemini-2.5-flash":2.5,
+            "gemini-2.0-flash":0.7,
+            "gemini-2.5-flash-lite":0.4
+        }
+        i_price = input_prices.get(model, 0)
+        o_price= out_prices.get(model, 0)
+        return i_price, o_price
+    # =====================================================
+    def identify_users(self, users_df, identifier_column):
+        """
+        specifying the users for identification
+        :param identifier_column:
+        :return: updated users
+        """
+        if identifier_column.upper() == "EMAIL":
+            return users_df
+        else:
+            users_df.rename(columns={identifier_column: "USER_ID"}, inplace=True)
+            return users_df
+    # ------------------------------------------------------------------
+    def _create_personalized_message(self, CoreConfig):
+        """
+        main function of the class to flow the work between functions inorder to create personalized messages.
+        :return: updated users_df with extracted information and personalize messages.
+        """
+        # Collecting all the data that we need to personalize messages
+        datacollect = DataCollector(CoreConfig)
+        CoreConfig = datacollect.gather_data()
+        if len(CoreConfig.users_df) == 0:
+            print("No valid user at the moment")
+            return None
+        else:
+            # generating recommendations for users, if we want to include recommendations in the message
+            if CoreConfig.involve_recsys_result and CoreConfig.messaging_mode != "message":
+                # We use random recommender, which means we pick a content randomly from their top list
+                # This approach will simply the process and we have unique recommendation for every single user.
+                Recommender = LLMR(CoreConfig, random=True)
+                CoreConfig = Recommender.get_recommendations()
+            else:
+                # We only want to generate the message and redirect them to For You section or Homepage
+                Recommender = DefaultRec(CoreConfig)
+                CoreConfig = Recommender.get_recommendations()
+            # Initialize message generator
+            message_generator = MessageGenerator(CoreConfig)
+            # Check if using agentic workflow
+            if message_generator.use_agentic_workflow:
+                # Agentic workflow: GeneratorAgent handles prompt generation internally
+                logger.info("Using agentic workflow for message generation")
+            else:
+                # Legacy workflow: Generate prompts separately
+                prompt = PromptGenerator(CoreConfig)
+                CoreConfig = prompt.generate_prompts()
+            # generating messages for each user
+            CoreConfig = message_generator.generate_messages()
+            # Eliminating rows where we don't have a valid message (null, empty, or whitespace only)
+            CoreConfig.users_df = CoreConfig.users_df[CoreConfig.users_df["message"].str.strip().astype(bool)]
+            # CoreConfig.checkpoint()
+            return CoreConfig.users_df
+    def _create_followup_personalized_message(self, CoreConfig, stage, test_mode, mode="production", ui_experiment_id=None):
+        """
+        Generate follow up messages for users
+        :param CoreConfig:
+        :param stage:
+        :param test_mode:
+        :param mode: operating mode - "production", "test", or "ui"
+        :param ui_experiment_id: Optional experiment ID for UI mode
+        :return:
+        """
+        # Read data from previous runs
+        datacollect = DataCollector(CoreConfig)
+        CoreConfig = datacollect.fetch_log_data(stage, test_mode, mode, ui_experiment_id)
+        if CoreConfig is None or len(CoreConfig.users_df) == 0:
+            print("No valid user at the moment")
+            return None
+        else:
+            # generating recommendations for users, if we want to include recommendations in the message
+            if CoreConfig.involve_recsys_result and CoreConfig.messaging_mode != "message":
+                # We use random recommender, which means we pick a content randomly from their top list
+                # This approach will simply the process and we have unique recommendation for every single user.
+                Recommender = LLMR(CoreConfig, random=True)
+                CoreConfig = Recommender.get_followup_recommendation()
+            else:
+                # We only want to generate the message and redirect them to For You section or Homepage
+                Recommender = DefaultRec(CoreConfig)
+                CoreConfig = Recommender.get_recommendations()
+            # Initialize message generator
+            message_generator = MessageGenerator(CoreConfig)
+            # Check if using agentic workflow
+            if message_generator.use_agentic_workflow:
+                # Agentic workflow: GeneratorAgent handles prompt generation internally
+                logger.info("Using agentic workflow for follow-up message generation")
+            else:
+                # Legacy workflow: Generate prompts separately
+                prompt = PromptGenerator(CoreConfig)
+                CoreConfig = prompt.generate_prompts(stage=stage)
+            # generating messages for each user
+            CoreConfig = message_generator.generate_messages(step=stage)
+            # Eliminating rows where we don't have a valid message (null, empty, or whitespace only)
+            CoreConfig.users_df = CoreConfig.users_df[CoreConfig.users_df["message"].str.strip().astype(bool)]
+            # CoreConfig.checkpoint()
+            return CoreConfig.users_df
+    # ======================= UI MODE HELPER FUNCTIONS =======================
+    def _store_to_csv_ui_mode(self, messages_df: pd.DataFrame, cost_df: pd.DataFrame, ui_experiment_id: str = None):
+        """
+        Store messages and cost data to local CSV files in UI mode.
+        This function appends data to a single CSV file that grows with each stage,
+        similar to how Snowflake stores all stages in one table.
+        Uses UTF-8 encoding to properly support emojis and special characters.
+        Args:
+            messages_df: DataFrame containing generated messages
+            cost_df: DataFrame containing cost information
+            ui_experiment_id: Optional experiment ID for naming files (e.g., 'messages_a_drumeo_20260111_1756')
+        """
+        # Ensure output directory exists
+        self.UI_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+        # Define output file paths
+        # Use experiment ID if provided (for AB testing), otherwise use default filename
+        if ui_experiment_id:
+            messages_file = self.UI_OUTPUT_DIR / f"{ui_experiment_id}.csv"
+        else:
+            messages_file = self.UI_OUTPUT_DIR / self.UI_OUTPUT_FILE
+        cost_file = self.UI_OUTPUT_DIR / "message_cost.csv"
+        try:
+            # Store messages (append mode for multi-stage campaigns)
+            # Use UTF-8-SIG encoding to support emojis with BOM for better compatibility
+            if messages_file.exists():
+                # Append to existing file (use utf-8 for append to avoid multiple BOMs)
+                messages_df.to_csv(messages_file, mode='a', header=False, index=False, encoding='utf-8')
+                logger.info(f"Appended {len(messages_df)} messages to {messages_file}")
+            else:
+                # Create new file with header and BOM
+                messages_df.to_csv(messages_file, mode='w', header=True, index=False, encoding='utf-8-sig')
+                logger.info(f"Created new messages file with {len(messages_df)} messages at {messages_file}")
+            # Store cost data (append mode)
+            if cost_file.exists():
+                cost_df.to_csv(cost_file, mode='a', header=False, index=False, encoding='utf-8')
+            else:
+                cost_df.to_csv(cost_file, mode='w', header=True, index=False, encoding='utf-8-sig')
+            print(f"✅ UI Mode: Stored {len(messages_df)} messages to {messages_file}")
+            logger.info(f"UI Mode: Successfully stored messages and cost data locally")
+        except Exception as e:
+            logger.error(f"Error storing data in UI mode: {str(e)}")
+            print(f"❌ Error storing data in UI mode: {str(e)}")
+            raise
+    @classmethod
+    def get_ui_output_path(cls):
+        """
+        Get the path to UI output directory.
+        Returns:
+            Path: Path to UI output directory
+        """
+        return cls.UI_OUTPUT_DIR
+    @classmethod
+    def clear_ui_output(cls):
+        """
+        Clear all files in the UI output directory.
+        This should be called when starting a new UI run with fresh inputs.
+        """
+        if cls.UI_OUTPUT_DIR.exists():
+            for file in cls.UI_OUTPUT_DIR.glob("*.csv"):
+                try:
+                    file.unlink()
+                    logger.info(f"Deleted {file.name} from UI output directory")
+                except Exception as e:
+                    logger.warning(f"Could not delete {file.name}: {str(e)}")
+            print(f"🧹 Cleared UI output directory: {cls.UI_OUTPUT_DIR}")
+        else:
+            print(f"ℹ️  UI output directory does not exist yet: {cls.UI_OUTPUT_DIR}")

{Messaging_system → ai_messaging_system_v2/Messaging_system}/PromptGenerator.py RENAMED Viewed

@@ -3,7 +3,6 @@ THis class generate proper prompts for the messaging system
 """
 import pandas as pd
 from tqdm import tqdm
-from Messaging_system.PromptEng import PromptEngine
 class PromptGenerator:
@@ -13,7 +12,7 @@ class PromptGenerator:
     # --------------------------------------------------------------
     # --------------------------------------------------------------
-    def generate_prompts(self):
         """
         generates a personalized message for each student
         :return:
@@ -25,9 +24,11 @@ class PromptGenerator:
         # if we have personalized information about them, we generate a personalized prompt
         for idx, row in tqdm(self.Core.users_df.iterrows(), desc="generating prompts"):
             # check if we have enough information to generate a personalized message
-            prompt = self.generate_personalized_prompt(user=row)
-            # new_prompt = engine.prompt_engineering(prompt)
-            # self.Core.users_df.at[idx, "prompt"] = new_prompt
             self.Core.users_df.at[idx, "prompt"] = prompt
             self.Core.users_df.at[idx, "source"] = "AI-generated"
@@ -40,21 +41,27 @@ class PromptGenerator:
     # ==============================================================
     def get_user_profile(self, user):
         if self.Core.personalization:
             user_info = f"""
-### **Use below information from the user to make the final output more personalized if applicable.
-Use these info only to *flavour* the header and message if they add value. Never add new sentences just to stuff profile data.
 - The user is a {str(self.Core.get_instrument())} student.
 - {self.safe_get(self.Core.segment_info)}
-- Experience level (0-9):  {self.safe_get(user.get("difficulty"))}
-- User profile --> use **indirectly** if it can improve personalization and **do not** use their preferences keywords ("genre, styles") directly in the message:
 {self.safe_get(user.get("user_info"))}
 """
-            # eliminated: - first name: {self.safe_get(user.get("first_name"))} --> Only use if is available and the first name is a **valid name**
-            return user_info
         else:
@@ -79,17 +86,56 @@ Here is the information about the user:
         #
         # {self.safe_get(user.get("user_info"))}
     # --------------------------------------------------------------
     def generate_personalized_prompt(self, user):
         """
         generate a personalized prompt by putting the information from the user into a template prompt
         :return: Personalized prompt (string)
         """
         input_context = self.input_context()
-        instructions = self.message_type_instructions()
         user_info = self.get_user_profile(user=user)
         recommendation_instructions = self.recommendations_instructions(user)
         example_output = self.example_output()
@@ -99,13 +145,13 @@ Here is the information about the user:
         prompt = f"""
 {input_context}
-{instructions}
 {user_info}
-{recommendation_instructions}
-{example_output}
 {output_instructions}
     """
@@ -121,16 +167,9 @@ Here is the information about the user:
         if self.Core.personalization:
             context = f"""
-You’re writing a push notification for a {self.Core.get_instrument()} student.
-**What to do**
-• Produce two fields: "header" and "message".
-• Keep the header < {self.Core.config_file["header_limit"]} chars and the message < {self.Core.config_file["message_limit"]}chars.
-**Voice & Style**
-• Sounds like a friend texting a tip.
-• No hype, no sales language, no “AI‑speak.”
-• If you add an emoji, use {self.Core.get_emoji()} exactly once, at the *end* of either the header *or* message (never both).
 """
         else:
@@ -151,6 +190,7 @@ DO NOT **change** or **modify** or **add to** the selected option in any shape o
         :return:
         """
         instructions_for_recsys = f"""
 ### ** Recommendation Personalization Guidelines **
@@ -159,27 +199,31 @@ Below is the content we want to recommend to the user:
 → Recommended Content Details:
 {user["recommendation_info"]}
-When incorporating this content into the message, follow these guidelines to keep the message friendly, relevant, and casual (not too scripted):
 1. **Title Usage**:
-   - Refer to the **CONTENT_TITLE** naturally in the message — paraphrase or describe it, but do *not* quote it or use it verbatim.
    - Avoid making it feel like a promotion; frame it as something that *might interest* or *help* the user.
 2. **Content Type Context**:
-   - Mention the **CONTENT_TYPE** (e.g., course, workout, quicktip) only if it flows naturally in the message.
 3. **Artist/Instructor Name**:
    - If the full name of the **ARTIST** is available, mention it casually if appropriate (e.g., "led by Jordan Mitchell").
    - If only the first name is known, do *not* include it in the message at all.
 4. **Tone & Style**:
    - Keep the tone light, supportive, and personal — like a helpful suggestion from a friend.
-   - Avoid sounding pushy or overly promotional. Think friendly nudge, not marketing pitch.
 5. **Flexibility**:
    - You don’t need to include all elements every time. Prioritize what feels most relevant and natural based on the context.
 Goal: Make the recommendation feel personalized and casually relevant — not generic or copy-pasted.
 """
@@ -195,22 +239,22 @@ Goal: Make the recommendation feel personalized and casually relevant — not ge
         :return: output instructions as a string
         """
         instructions = f"""
 ### **Output instructions**:
 **Expected output structure:**
-thoughts:
-1. Generate a header less than {self.Core.config_file["header_limit"]} characters.
-2. Generate a message less than {self.Core.config_file["message_limit"]} characters.
-3. Ensure that the output is a valid JSON following below structure.
 {{
-  "header": "final header",
-  "message": "final message",
 }}
     """
         return instructions
@@ -230,66 +274,52 @@ thoughts:
         else:
             # one shot prompting
             example = f"""
-Below are good examples to follow our voice, tune, structure and characteristic. mimic our style to create the header and message for the push notification:
-### **Good examples:**
 {self.Core.sample_example}
     """
             return example
-    def message_type_instructions(self):
         """
-        create a proper instruction for the message type, regarding the input platform
-        :return: message instructions as a string
         """
-        instructions = ""
-        message_style = self.message_instructions()
-        if self.Core.platform == "push":
-            instructions = f"""
-### ** General Instructions: **
-- Don’t repeat words from header in the message if you can avoid it., and make sure there is no grammar problem.
-- Keep sentences short and spoken‑word friendly.
-- The message, vocabulary and sentences **MUST** sound like a natural conversation: something that people normally say in daily conversations.
-- No exclamation marks in the header; one is OK in the message if it feels natural.
-{message_style}
-    """
-        elif self.Core.platform == "app":
-            instructions = f"""
-    Message Specifications:
-    - The message is an **in app notification**.
-    - ** Keep the First sentence as "header" that should be a short personalized eye catching sentence less than 40 character **.
-    - ** For the "header", don't use exclamation mark at the end, instead, use a space following with a proper emoji at the end of the "header" (e.g. Great work John 😍) **
-    - **Keep the message concise and straightforward**.
-    - **Start directly with the message content**; do not include greetings (e.g., "Hello") or closing phrases.
-    - Make the message highly **personalized** and **eye-catching**.
-        - "Personalized" means the user should feel the message is specifically crafted for them and not generic.
-    - **Every word should contribute to maximizing impact and engagement**.
-    - {message_style}
-            """
-        return instructions
-    # =================================================
-    def message_instructions(self):
-        """
-        defines the style of the message: e.g. friendly, kind, tone, etc.
-        :return: style_instructions(str)
-        """
-        if self.Core.message_style is None or self.Core.message_style != "":
-            message_style = ""
-        else:
-            message_style = f"""
-** Important instructions**
-- {self.Core.message_style}.
-        """
-        return message_style

 """
 import pandas as pd
 from tqdm import tqdm
 class PromptGenerator:
     # --------------------------------------------------------------
     # --------------------------------------------------------------
+    def generate_prompts(self, stage=1):
         """
         generates a personalized message for each student
         :return:
         # if we have personalized information about them, we generate a personalized prompt
         for idx, row in tqdm(self.Core.users_df.iterrows(), desc="generating prompts"):
             # check if we have enough information to generate a personalized message
+            if stage == 1:
+                prompt = self.generate_personalized_prompt(user=row)
+            else:
+                prompt = self.generate_follow_up_prompt(user=row)
             self.Core.users_df.at[idx, "prompt"] = prompt
             self.Core.users_df.at[idx, "source"] = "AI-generated"
     # ==============================================================
     def get_user_profile(self, user):
+        """
+        getting personalized information about users (e.g. preferences)
+        :param user:
+        :return:
+        """
         if self.Core.personalization:
             user_info = f"""
+### **Use below information from the user to **modify** the selected header and message (without any change in style, tune and content) and make the final output more personalized if applicable.
+Use these only to *flavour* the existing header and message. Never add new sentences just to stuff profile data.
 - The user is a {str(self.Core.get_instrument())} student.
 - {self.safe_get(self.Core.segment_info)}
+- User profile --> Only use **indirectly** if it can improve personalization (Don't use their preferred genre or styles directly in the message:
 {self.safe_get(user.get("user_info"))}
 """
+            # birth_day_instructions = self.birth_day_instructions(user)
+            # if birth_day_instructions is not None:
+            #     user_info += "\n" + birth_day_instructions
+            ## eliminate:
+            # - first name: {self.safe_get(user.get("first_name"))} --> Only use if is available and the first name is a **valid name**
         else:
         #
         # {self.safe_get(user.get("user_info"))}
+        return user_info
     # --------------------------------------------------------------
+    def get_additional_instructions(self):
+        """
+        Generates additional instructions section from campaign-wide and per-message instructions.
+        Injected after user profile section as specified.
+        :return: Formatted instructions string or empty string if no instructions
+        """
+        instructions_parts = []
+        # Add campaign-wide instructions if available
+        if self.Core.campaign_instructions:
+            instructions_parts.append(f"""### **Campaign Instructions**
+{self.Core.campaign_instructions}""")
+        # Add per-message (stage-specific) instructions if available
+        if self.Core.per_message_instructions:
+            instructions_parts.append(f"""### **Additional Instructions for This Message**
+{self.Core.per_message_instructions}""")
+        # Combine all instructions
+        if instructions_parts:
+            return "\n\n".join(instructions_parts)
+        else:
+            return ""
+    # --------------------------------------------------------------
+    def birth_day_instructions(self, user):
+        # Birthday reminder
+        if pd.notna(user["birthday_reminder"]) and user["birthday_reminder"] not in [None, [], {}]:
+            instructions = f"""
+- **Include a short message to remind them that their birthday is coming up.**: {str(user["birthday_reminder"])} Days until their birthday.
+        """
+            return instructions
+        else:
+            return None
     def generate_personalized_prompt(self, user):
         """
         generate a personalized prompt by putting the information from the user into a template prompt
         :return: Personalized prompt (string)
         """
         input_context = self.input_context()
         user_info = self.get_user_profile(user=user)
+        # NEW: Get additional instructions (campaign-wide + per-message)
+        additional_instructions = self.get_additional_instructions()
         recommendation_instructions = self.recommendations_instructions(user)
         example_output = self.example_output()
         prompt = f"""
 {input_context}
+{example_output}
 {user_info}
+{additional_instructions}
+{recommendation_instructions}
 {output_instructions}
     """
         if self.Core.personalization:
             context = f"""
+Your task is to select the best 'header' and a 'message' for a {self.Core.get_instrument()} student as a push notification.
+Based on the user instructions, you might need to **modify the selected option** very minimal and slightly to improve personalization if capable.
+**Important Note**: header < {self.Core.config_file["header_limit"]} and message < {self.Core.config_file["message_limit"]} characters.
 """
         else:
         :return:
         """
         instructions_for_recsys = f"""
 ### ** Recommendation Personalization Guidelines **
 → Recommended Content Details:
 {user["recommendation_info"]}
+When incorporating this content into the message and header, follow these guidelines to keep the header and message friendly, relevant, and casual (not too scripted):
 1. **Title Usage**:
+   - Refer to the **CONTENT_TITLE** or content details naturally in the message — paraphrase or describe it, but do *not* quote it or use it verbatim.
    - Avoid making it feel like a promotion; frame it as something that *might interest* or *help* the user.
 2. **Content Type Context**:
+   - Mention the **CONTENT_TYPE** (e.g., course, workout) only if it flows naturally in the message.
 3. **Artist/Instructor Name**:
    - If the full name of the **ARTIST** is available, mention it casually if appropriate (e.g., "led by Jordan Mitchell").
    - If only the first name is known, do *not* include it in the message at all.
+   - **DO NOT ASSUME or HALLUCINATE Artist name based on previous messages**, only refer to Recommended Content Details provided.
 4. **Tone & Style**:
    - Keep the tone light, supportive, and personal — like a helpful suggestion from a friend.
+   - Avoid sounding pushy, overly promotional, or marketing pitch.
 5. **Flexibility**:
    - You don’t need to include all elements every time. Prioritize what feels most relevant and natural based on the context.
+6. **Time Reference**:
+   - NEVER use time-related words (“new,” “recent,” “latest,” etc.) and Never imply recency of the content in any way.
 Goal: Make the recommendation feel personalized and casually relevant — not generic or copy-pasted.
 """
         :return: output instructions as a string
         """
+        general_instructions = f"""
+- Ensure that the output is a valid JSON following above structure.
+        """
         instructions = f"""
 ### **Output instructions**:
+- header < {self.Core.config_file["header_limit"]} and message < {self.Core.config_file["message_limit"]} characters.
 **Expected output structure:**
 {{
+  "header": "final header considering instructions",
+  "message": "final message considering instructions",
 }}
+{general_instructions}
     """
         return instructions
         else:
             # one shot prompting
             example = f"""
+Below are the available options to select the header and message for the push notification:
+### **Available options:**
 {self.Core.sample_example}
     """
             return example
+    # =============================================================
+    def generate_follow_up_prompt(self, user):
         """
+        Creates a prompt to feed to the LLM, incorporating 3 previously generated messages.
+        :param previous_messages: A list of dicts, each containing 'header' and 'message'.
+        :return: A user-facing prompt string instructing the model to produce a new message.
         """
+        previous_text_str = str(user["previous_messages"])
+        user_info = self.get_user_profile(user=user)
+        input_context = self.input_context()
+        # NEW: Get additional instructions (campaign-wide + per-message)
+        additional_instructions = self.get_additional_instructions()
+        recommendation_instructions = self.recommendations_instructions(user)
+        output_instructions = self.output_instruction()
+        examples = self.example_output()
+        # Craft the prompt
+        prompt = f"""
+We have previously sent these push notifications to the user and The user has not re-engaged yet:
+** Previous messages **
+{previous_text_str}
+{input_context}
+- The new selection should be different from previous headers and messages and we should not have similar words and phrases from previous sends.
+{examples}
+{user_info}
+{additional_instructions}
+{recommendation_instructions}
+{output_instructions}
+"""
+        return prompt

ai_messaging_system_v2/Messaging_system/agents/README.md ADDED Viewed

	@@ -0,0 +1,518 @@

+# Agentic Workflow System
+## Overview
+The Agentic Workflow System is a multi-agent architecture designed to enhance the quality and accuracy of personalized push notification messages. It replaces the single-step LLM generation with a sophisticated two-agent system that includes generation, validation, and iterative refinement.
+## Architecture
+### System Components
+```
+┌─────────────────────────────────────────────────────────────┐
+│                   AgentOrchestrator                          │
+│  (Manages workflow, feedback loops, rejection logging)      │
+└───────┬─────────────────────────────────────────┬───────────┘
+        │                                         │
+        ▼                                         ▼
+┌──────────────────┐                    ┌──────────────────┐
+│  GeneratorAgent  │◄───── Feedback ────│  SecurityAgent   │
+│                  │                    │                  │
+│ - Prompt Gen     │                    │ - Rule-based     │
+│ - LLM Call       │                    │ - LLM-based      │
+│ - Instructions   │                    │ - Validation     │
+└──────────────────┘                    └──────────────────┘
+        │                                         │
+        └─────────► Message ─────────►           │
+                                                  │
+                                        ┌─────────▼─────────┐
+                                        │ RejectionLogger   │
+                                        │ (CSV Logs)        │
+                                        └───────────────────┘
+```
+### Key Features
+1. **Multi-Agent System**: Separates generation from validation for improved quality control
+2. **Feedback Loop**: Up to 3 attempts with detailed feedback for regeneration
+3. **Rule-Based + LLM Validation**: Fast rule-based checks followed by focused LLM validation (instruction adherence, content accuracy, authenticity)
+4. **Rejection Logging**: Tracks all rejections with detailed information for evaluation
+5. **Scalable Architecture**: Easy to add new agents and validation rules
+6. **Enhanced Prompt Engineering**: Different strategies for instructions vs. examples
+7. **Conditional Prompts**: Support for dynamic prompt injection (e.g., birthday reminders)
+8. **Smart LLM Integration**: Multi-mode validation system with automatic retries and fallback strategies
+### LLM Integration & Validation Modes
+The agentic system integrates seamlessly with the existing `LLM.py` infrastructure through a sophisticated validation mode system:
+**Three Validation Modes**:
+1. **`message_generation`** (for GeneratorAgent):
+   - Validates presence of `header` and `message` keys
+   - Enforces character limits (header < 30, message < 110)
+   - Automatically retries up to 6 times (Google) or 5 times (OpenAI)
+   - Provides detailed error messages for each retry
+2. **`validation_response`** (for SecurityAgent):
+   - Validates presence of `approved` key
+   - Expects response format: `{"approved": true/false, "issues": [...], "feedback": "..."}`
+   - Used for LLM-based quality validation
+3. **`generic_json`** (for future agents):
+   - Only validates JSON syntax
+   - No specific key requirements
+   - Flexible for custom agent responses
+**Benefits**:
+- Rule-based validation happens **before** LLM calls, catching most issues early
+- Each agent uses the correct validation for its response format
+- No false rejections due to mismatched validation expectations
+- Automatic retry logic with detailed feedback reduces waste
+## Agents
+### 1. BaseAgent (Abstract Class)
+The foundation for all agents, providing common interface and utilities.
+**Location**: `base_agent.py`
+**Key Methods**:
+- `execute(context)`: Abstract method that all agents must implement
+- `log_info/warning/error()`: Logging utilities
+- `validate_context()`: Context validation helper
+**Purpose**: Ensures consistency across agents and makes it easy to add new agents.
+### 2. GeneratorAgent
+Generates personalized messages using cutting-edge prompt engineering.
+**Location**: `generator_agent.py`
+**Responsibilities**:
+- Generate prompts with adaptive strategies based on instructions/examples
+- Call LLM to create headers and messages
+- Handle feedback from SecurityAgent for regeneration
+- Support conditional prompt injection (birthday reminders, etc.)
+**Prompt Engineering Strategies**:
+| Scenario | Strategy |
+|----------|----------|
+| Instructions + Examples | Prioritize instructions, use examples for style/voice |
+| Only Examples | Analyze examples to understand brand vocabulary, style, voice; create personalized message matching that style |
+| Only Instructions | Follow instructions without example reference |
+| Neither | Basic prompt with user data |
+**Key Features**:
+- Handles both initial messages (stage 1) and follow-up messages (stages 2-11)
+- Checks previous messages to avoid repetition
+- Integrates user profile, recommendations, and conditional prompts
+- Enforces character limits and banned phrase avoidance
+### 3. SecurityAgent
+Validates generated messages as a quality firewall.
+**Location**: `security_agent.py`
+**Responsibilities**:
+- Perform fast rule-based validation
+- Conduct focused LLM-based validation (instruction adherence, content accuracy, authenticity)
+- Provide detailed feedback for regeneration
+- Approve or reject messages
+**Validation Pipeline**:
+```
+Input Message
+    │
+    ▼
+┌─────────────────────┐
+│ Rule-Based Checks   │ (Fast - <1ms)
+│ - Character limits  │
+│ - Empty content     │
+│ - Banned phrases    │
+│ - Placeholders      │
+│ - Singeo phrases    │
+└──────┬──────────────┘
+       │
+       ▼ (If passes)
+┌─────────────────────┐
+│ LLM-Based Checks    │ (Focused & Concise)
+│ - Instruction       │
+│   adherence         │
+│ - Content accuracy  │
+│ - Authenticity      │
+│ - Time words        │
+│ - Similarity        │
+└──────┬──────────────┘
+       │
+       ▼
+   Approved ✓
+```
+**Validation Criteria**:
+1. **Rule-Based** (Fast - catches obvious issues before LLM):
+   - **Character limits**: header < 30, message < 110
+   - **Non-empty content**: Both header and message must have text
+   - **Banned phrases**: No AI jargon or brand-specific banned phrases
+   - **Placeholder detection**: No template variables like [user_name], {name}, {{var}}
+   - **Singeo-specific**: No "your instrument" phrases (vocals aren't instruments)
+2. **LLM-Based** (Context-aware validation):
+   - **Instruction adherence**: Follows provided campaign/message instructions
+   - **Content accuracy**: Correct artist/content names, no hallucinations
+   - **Authenticity**: Sounds human, not robotic
+   - **Time words**: Distinguishes between content recency (REJECT: "new course") vs action timing (OK: "practice today")
+   - **Similarity check**: For follow-ups, ensures message doesn't sound too similar to last 2 messages (focuses on structure/tone, not just words)
+   - Note: Be lenient - only reject OBVIOUS problems
+**Recent Improvements (Jan 2026)**:
+1. **Placeholder Detection**:
+   - Catches template variables like [user_name], {name}, {{var}} before they reach users
+   - Prevents hallucination where LLM generates placeholder strings instead of actual content
+2. **Smart Time-Word Validation**:
+   - Moved from rigid rule-based to context-aware LLM validation
+   - Distinguishes "practice today" (action timing ✓) from "new course" (content recency ✗)
+   - Prevents false claims that recommended content is new
+3. **Singeo Brand Protection**:
+   - Special check for "your instrument" phrases when brand is Singeo
+   - Prevents awkward phrasing since vocals/singing are not instruments
+   - Uses natural language like "practice" or "continue learning"
+4. **Message Similarity Detection**:
+   - Compares against last 2 previous messages for follow-ups
+   - Focuses on overall impression/structure, not just word overlap
+   - Example: "Ready to sing?" vs "Ready to practice?" = too similar (same feel)
+   - Ensures fresh, varied messaging across campaign stages
+5. **Simplified Validation Prompts**:
+   - Reduced from ~80 lines to ~25 lines
+   - More concise, direct, and effective
+   - Faster processing with better clarity
+   - All validation criteria maintained
+6. **Brand-Specific Labeling Prevention**:
+   - Banned labels across all brands: "drummer", "guitarist", "pianist", "singer"
+   - Uses "learning {instrument}" phrasing instead of "{instrument} student"
+   - Natural, non-labeling language that encourages without pushing
+### 4. AgentOrchestrator
+Manages the workflow between agents and handles feedback loops.
+**Location**: `agent_orchestrator.py`
+**Responsibilities**:
+- Coordinate GeneratorAgent and SecurityAgent
+- Implement feedback loop (max 3 attempts)
+- Manage rejection logging
+- Return approved message or None after exhausting attempts
+**Workflow**:
+```
+For each user:
+    For attempt in [1, 2, 3]:
+        1. GeneratorAgent.execute()
+           ├─ Success? Continue
+           └─ Failure? Log & retry
+        2. SecurityAgent.execute()
+           ├─ Approved? Return message
+           └─ Rejected? Log & provide feedback
+        3. Use feedback for next attempt
+    If all attempts fail:
+        Return None (user gets no message)
+```
+### 5. RejectionLogger
+Logs all rejections to CSV files for evaluation and optimization.
+**Location**: `rejection_logger.py`
+**CSV Format**:
+- `timestamp`: When the rejection occurred
+- `user_id`: User identifier
+- `attempt_number`: Which attempt (1, 2, or 3)
+- `rejection_reason`: Brief reason for rejection
+- `validation_type`: rule_based or llm_based
+- `detailed_feedback`: Detailed feedback for improvement
+- `generated_header`: The rejected header
+- `generated_message`: The rejected message
+- `header_length`: Header character count
+- `message_length`: Message character count
+- `model_used`: LLM model that generated the message
+- `prompt_excerpt`: First 200 chars of prompt
+- `recommendation_info`: Content recommendation details
+- `has_instructions`: Whether instructions were provided
+- `has_examples`: Whether examples were provided
+**File Naming**: `{brand}_{campaign_name}_stage{stage}_{timestamp}_rejections.csv`
+**Storage Location**: `ai_messaging_system_v2/logs/rejections/`
+**Use Cases**:
+- Evaluate LLM performance
+- Identify common rejection patterns
+- Optimize prompts and instructions
+- Track improvement over time
+- A/B testing different strategies
+## Usage
+### Basic Usage
+The agentic workflow is enabled by default in `MessageGenerator`:
+```python
+from Messaging_system.Permes import Permes
+permes = Permes()
+users_df = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand="drumeo",
+    config_file=system_config,
+    stage=1,
+    campaign_name="re_engagement",
+    campaign_instructions="Keep messages encouraging",
+    per_message_instructions="Focus on the recommended content"
+)
+```
+### Switching to Legacy Mode
+To disable the agentic workflow and use the legacy single-step generation:
+```python
+from Messaging_system.Message_generator import MessageGenerator
+message_generator = MessageGenerator(core_config)
+message_generator.use_agentic_workflow = False  # Disable agents
+core_config = message_generator.generate_messages(step=1)
+```
+### Accessing Rejection Logs
+After generation, rejection logs are automatically saved:
+```python
+# Log location is printed in console output
+# Example: ai_messaging_system_v2/logs/rejections/drumeo_re_engagement_stage1_20250130_143022_rejections.csv
+import pandas as pd
+# Read rejection log
+rejections = pd.read_csv("path/to/rejection_log.csv")
+# Analyze common rejection reasons
+print(rejections["rejection_reason"].value_counts())
+# Filter by validation type
+rule_based = rejections[rejections["validation_type"] == "rule_based"]
+llm_based = rejections[rejections["validation_type"] == "llm_based"]
+```
+## Configuration
+### System Configuration
+Managed in `configs/system/system_config.py`:
+```python
+SYSTEM_CONFIG = {
+    "header_limit": 30,
+    "message_limit": 110,
+    "AI_Jargon": ["elevate", "enhance", "ignite", ...],
+    "AI_phrases_drumeo": [...],
+    "AI_phrases_pianote": [...],
+    ...
+}
+```
+### Campaign Configuration
+Instructions can be set at two levels:
+1. **Campaign-Wide Instructions** (apply to all stages):
+```python
+CAMPAIGNS = {
+    "re_engagement": {
+        "campaign_instructions": "Keep messages encouraging and upbeat",
+        ...
+    }
+}
+```
+2. **Per-Message Instructions** (stage-specific):
+```python
+"1": {
+    "stage": 1,
+    "instructions": "Focus on the recommended content",
+    ...
+}
+```
+## Adding New Agents
+To add a new agent to the system:
+1. **Create Agent Class**:
+```python
+from .base_agent import BaseAgent
+class MyNewAgent(BaseAgent):
+    def __init__(self, core_config):
+        super().__init__(name="MyNewAgent", core_config=core_config)
+    def execute(self, context):
+        # Implement agent logic
+        return {
+            "success": True,
+            "data": {...},
+            "error": None
+        }
+```
+2. **Update Agent Orchestrator**:
+```python
+# In agent_orchestrator.py
+self.my_new_agent = MyNewAgent(core_config)
+# Add to workflow
+result = self.my_new_agent.execute(context)
+```
+3. **Update `__init__.py`**:
+```python
+from .my_new_agent import MyNewAgent
+__all__ = [
+    ...
+    "MyNewAgent"
+]
+```
+## Performance Considerations
+### Parallel Processing
+The agentic workflow maintains the parallel processing architecture:
+- User chunks are processed in parallel at the top level
+- Within each chunk, agents run sequentially for each user
+- This balances throughput with quality control
+### Latency
+- **Rule-based validation**: < 1ms per message
+- **LLM-based validation**: ~1-2 seconds per message
+- **Total per user** (with 1 attempt): ~2-4 seconds
+- **Total per user** (with 3 attempts): ~6-12 seconds
+### Cost Optimization
+- Rule-based checks eliminate most invalid messages before LLM validation
+- Feedback loop reduces wasted generations
+- Rejection logging helps identify and fix systematic issues
+## Monitoring & Debugging
+### Logging
+All agents log their activities:
+```python
+import logging
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+```
+### Rejection Statistics
+After each run, check rejection stats:
+```python
+# Automatically logged at the end of generation
+# Example output:
+# Rejection stats: {
+#     'total_rejections': 15,
+#     'rule_based_rejections': 10,
+#     'llm_based_rejections': 5,
+#     'by_attempt': {1: 8, 2: 5, 3: 2},
+#     'common_reasons': {
+#         'Message exceeds limit': 6,
+#         'Contains banned phrase': 4,
+#         ...
+#     }
+# }
+```
+## Best Practices
+1. **Provide Clear Instructions**: The more specific your instructions, the better the results
+2. **Use Examples**: Examples help the agent understand your brand voice
+3. **Monitor Rejections**: Regularly review rejection logs to identify issues
+4. **Iterate on Prompts**: Use rejection feedback to improve campaign instructions
+5. **Test in Stages**: Test with small user samples before full campaigns
+6. **Balance Quality & Speed**: Consider whether 3 attempts is optimal for your use case
+## Troubleshooting
+### High Rejection Rate
+- Check rejection logs for common patterns
+- Review campaign and per-message instructions
+- Ensure examples match your brand voice
+- Verify character limits are achievable
+### LLM Validation Failures
+- Check LLM API connectivity
+- Review validation prompt logic in SecurityAgent
+- Consider adjusting validation criteria
+### No Messages Generated
+- Check if all attempts are being rejected
+- Review rejection logs for the reason
+- Verify user data quality
+- Check for empty instructions or examples
+## Future Enhancements
+Potential additions to the agentic system:
+1. **Performance Agent**: Analyzes engagement metrics and optimizes messages
+2. **A/B Testing Agent**: Generates variations for testing
+3. **Brand Consistency Agent**: Ensures messages align with brand guidelines
+4. **Personalization Scorer**: Rates personalization quality
+5. **Multi-Language Agent**: Handles translation and localization
+6. **Content Safety Agent**: Additional safety checks for sensitive content
+## Contributing
+When modifying the agentic workflow:
+1. Maintain the BaseAgent interface for new agents
+2. Update this README with any changes
+3. Add tests for new agents
+4. Document new configuration options
+5. Update rejection logging schema if needed
+## License
+Proprietary to Musora Media Inc.
+## Contact
+For questions or contributions: [email protected]

ai_messaging_system_v2/Messaging_system/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+Agentic Workflow System for AI Messaging
+This package contains the multi-agent system for generating and validating
+personalized messages with enhanced quality control.
+"""
+from .base_agent import BaseAgent
+from .generator_agent import GeneratorAgent
+from .security_agent import SecurityAgent
+from .agent_orchestrator import AgentOrchestrator
+from .rejection_logger import RejectionLogger
+__all__ = [
+    "BaseAgent",
+    "GeneratorAgent",
+    "SecurityAgent",
+    "AgentOrchestrator",
+    "RejectionLogger"
+]

ai_messaging_system_v2/Messaging_system/agents/agent_orchestrator.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""
+Agent Orchestrator
+Manages the agentic workflow between GeneratorAgent and SecurityAgent.
+Handles feedback loops, retry logic, and rejection logging.
+"""
+import json
+from typing import Dict, Any, Optional
+import logging
+import pandas as pd
+from .base_agent import BaseAgent
+from .generator_agent import GeneratorAgent
+from .security_agent import SecurityAgent
+from .rejection_logger import RejectionLogger
+logger = logging.getLogger(__name__)
+class AgentOrchestrator:
+    """
+    Orchestrates the agentic workflow for message generation and validation.
+    Workflow:
+    1. GeneratorAgent generates a message
+    2. SecurityAgent validates the message
+    3. If rejected, feedback is sent back to GeneratorAgent
+    4. Process repeats up to 3 attempts
+    5. All rejections are logged for evaluation
+    """
+    def __init__(self, core_config: Any, rejection_logger: Optional[RejectionLogger] = None):
+        """
+        Initialize the Agent Orchestrator.
+        Args:
+            core_config: CoreConfig instance
+            rejection_logger: Optional RejectionLogger instance
+        """
+        self.core_config = core_config
+        self.rejection_logger = rejection_logger
+        # Initialize agents
+        self.generator_agent = GeneratorAgent(core_config)
+        self.security_agent = SecurityAgent(core_config)
+        self.max_attempts = 3
+    def generate_and_validate_message(self, user: pd.Series, stage: int) -> Optional[Dict[str, Any]]:
+        """
+        Generate and validate a message for a user with feedback loop.
+        Args:
+            user: User data (pandas Series)
+            stage: Campaign stage number
+        Returns:
+            Approved message dict or None if all attempts failed:
+            {
+                "header": str,
+                "message": str,
+                "metadata": {
+                    "attempts": int,
+                    "model": str,
+                    ...
+                }
+            }
+        """
+        user_id = user.get("user_id", user.get("USER_ID", "unknown"))
+        feedback = None
+        for attempt in range(1, self.max_attempts + 1):
+            logger.info(f"[User {user_id}] Attempt {attempt}/{self.max_attempts}")
+            # Step 1: Generate message
+            gen_context = {
+                "user": user,
+                "stage": stage,
+                "attempt": attempt,
+                "feedback": feedback
+            }
+            gen_result = self.generator_agent.execute(gen_context)
+            if not gen_result["success"]:
+                logger.warning(f"[User {user_id}] Generation failed on attempt {attempt}: {gen_result['error']}")
+                # Log this as a generation failure
+                if self.rejection_logger:
+                    self.rejection_logger.log_rejection(
+                        user_id=str(user_id),
+                        attempt_number=attempt,
+                        rejection_reason="Generation failed",
+                        validation_type="generation_error",
+                        detailed_feedback=gen_result.get("error", "Unknown error"),
+                        generated_header="",
+                        generated_message="",
+                        model_used=self.core_config.model,
+                        prompt=gen_context.get("prompt", ""),
+                        recommendation_info=str(user.get("recommendation_info", "")),
+                        has_instructions=bool(self.core_config.campaign_instructions or self.core_config.per_message_instructions),
+                        has_examples=bool(self.core_config.sample_example)
+                    )
+                # If generation fails, try again
+                continue
+            # Extract generated content
+            header = gen_result["data"]["header"]
+            message = gen_result["data"]["message"]
+            prompt = gen_result["data"].get("prompt", "")
+            # Step 2: Validate message
+            val_context = {
+                "header": header,
+                "message": message,
+                "user": user,
+                "prompt": prompt,
+                "attempt": attempt
+            }
+            val_result = self.security_agent.execute(val_context)
+            # Check if approved
+            if val_result["success"] and val_result["data"]["approved"]:
+                logger.info(f"[User {user_id}] Message approved on attempt {attempt}")
+                # Return the approved message
+                return {
+                    "header": header,
+                    "message": message,
+                    "metadata": {
+                        "attempts": attempt,
+                        "model": self.core_config.model,
+                        "stage": stage,
+                        "approved": True
+                    }
+                }
+            # Message was rejected
+            rejection_reason = val_result["data"]["rejection_reason"]
+            detailed_feedback = val_result["data"]["detailed_feedback"]
+            validation_type = val_result["data"]["validation_type"]
+            logger.info(f"[User {user_id}] Message rejected on attempt {attempt}: {rejection_reason}")
+            # Log the rejection
+            if self.rejection_logger:
+                self.rejection_logger.log_rejection(
+                    user_id=str(user_id),
+                    attempt_number=attempt,
+                    rejection_reason=rejection_reason,
+                    validation_type=validation_type,
+                    detailed_feedback=detailed_feedback,
+                    generated_header=header,
+                    generated_message=message,
+                    model_used=self.core_config.model,
+                    prompt=prompt,
+                    recommendation_info=str(user.get("recommendation_info", ""))[:200],
+                    has_instructions=bool(self.core_config.campaign_instructions or self.core_config.per_message_instructions),
+                    has_examples=bool(self.core_config.sample_example)
+                )
+            # Set feedback for next attempt
+            if attempt < self.max_attempts:
+                feedback = detailed_feedback
+                logger.info(f"[User {user_id}] Retrying with feedback: {feedback}")
+            else:
+                logger.warning(f"[User {user_id}] Max attempts ({self.max_attempts}) reached. Giving up.")
+        # All attempts failed
+        logger.warning(f"[User {user_id}] Failed to generate valid message after {self.max_attempts} attempts")
+        return None
+    def process_batch(self, users_df: pd.DataFrame, stage: int) -> pd.DataFrame:
+        """
+        Process a batch of users through the agentic workflow.
+        Args:
+            users_df: DataFrame of users
+            stage: Campaign stage number
+        Returns:
+            DataFrame with generated messages and metadata
+        """
+        logger.info(f"Processing batch of {len(users_df)} users through agentic workflow")
+        results = []
+        for idx, user in users_df.iterrows():
+            result = self.generate_and_validate_message(user, stage)
+            if result:
+                # Add the result to the user data
+                user_data = user.to_dict()
+                user_data["header"] = result["header"]
+                user_data["message"] = result["message"]
+                user_data["metadata"] = result["metadata"]
+                results.append(user_data)
+            else:
+                # Failed to generate valid message
+                user_data = user.to_dict()
+                user_data["header"] = None
+                user_data["message"] = None
+                user_data["metadata"] = {"attempts": self.max_attempts, "approved": False}
+                results.append(user_data)
+        # Convert results back to DataFrame
+        results_df = pd.DataFrame(results)
+        # Log summary
+        successful = len([r for r in results if r["message"] is not None])
+        logger.info(f"Batch processing complete: {successful}/{len(users_df)} messages generated successfully")
+        return results_df
+    def set_rejection_logger(self, rejection_logger: RejectionLogger):
+        """Set the rejection logger."""
+        self.rejection_logger = rejection_logger
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        Get statistics about the orchestration process.
+        Returns:
+            Dictionary with stats
+        """
+        stats = {}
+        if self.rejection_logger:
+            stats["rejection_stats"] = self.rejection_logger.get_rejection_stats()
+            stats["rejection_log_path"] = str(self.rejection_logger.get_log_path())
+        return stats

ai_messaging_system_v2/Messaging_system/agents/base_agent.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+Base Agent Abstract Class
+Provides the foundation for all agents in the agentic workflow system.
+All agents must inherit from this class to ensure consistency and scalability.
+"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional
+import logging
+logger = logging.getLogger(__name__)
+class BaseAgent(ABC):
+    """
+    Abstract base class for all agents in the messaging system.
+    This class defines the common interface that all agents must implement,
+    ensuring consistency and making it easy to add new agents in the future.
+    """
+    def __init__(self, name: str, core_config: Any):
+        """
+        Initialize the base agent.
+        Args:
+            name: The name of the agent (e.g., "GeneratorAgent", "SecurityAgent")
+            core_config: CoreConfig instance containing system configuration
+        """
+        self.name = name
+        self.core_config = core_config
+        self.logger = logging.getLogger(f"{__name__}.{name}")
+    @abstractmethod
+    def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute the agent's main task.
+        Args:
+            context: Dictionary containing all necessary context for the agent,
+                    including user data, prompts, messages, feedback, etc.
+        Returns:
+            Dictionary containing the agent's output and status:
+            {
+                "success": bool,
+                "data": Any,
+                "error": Optional[str],
+                "metadata": Optional[Dict]
+            }
+        """
+        pass
+    def log_info(self, message: str):
+        """Log informational message."""
+        self.logger.info(f"[{self.name}] {message}")
+    def log_warning(self, message: str):
+        """Log warning message."""
+        self.logger.warning(f"[{self.name}] {message}")
+    def log_error(self, message: str):
+        """Log error message."""
+        self.logger.error(f"[{self.name}] {message}")
+    def validate_context(self, context: Dict[str, Any], required_keys: list) -> bool:
+        """
+        Validate that the context contains all required keys.
+        Args:
+            context: The context dictionary to validate
+            required_keys: List of required keys
+        Returns:
+            True if all required keys are present, False otherwise
+        """
+        missing_keys = [key for key in required_keys if key not in context]
+        if missing_keys:
+            self.log_error(f"Missing required keys in context: {missing_keys}")
+            return False
+        return True

ai_messaging_system_v2/Messaging_system/agents/generator_agent.py ADDED Viewed

	@@ -0,0 +1,470 @@

+"""
+Generator Agent
+Handles prompt generation and message creation using cutting-edge prompt engineering.
+Adapts prompts based on whether instructions and/or examples are provided.
+"""
+import json
+import pandas as pd
+from typing import Dict, Any, Optional
+from .base_agent import BaseAgent
+from ..LLM import LLM
+class GeneratorAgent(BaseAgent):
+    """
+    Generator Agent - Creates personalized messages with advanced prompt engineering.
+    This agent:
+    - Generates prompts with different strategies based on instructions vs examples
+    - Creates personalized headers and messages using LLM
+    - Handles feedback from SecurityAgent for regeneration attempts
+    - Supports conditional prompt injection (e.g., birthday reminders)
+    """
+    def __init__(self, core_config: Any):
+        """
+        Initialize the Generator Agent.
+        Args:
+            core_config: CoreConfig instance
+        """
+        super().__init__(name="GeneratorAgent", core_config=core_config)
+        self.llm = LLM(core_config)
+    def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute message generation.
+        Args:
+            context: {
+                "user": pd.Series - user data row
+                "stage": int - campaign stage
+                "feedback": Optional[str] - feedback from previous rejection
+                "attempt": int - current attempt number (1-3)
+            }
+        Returns:
+            {
+                "success": bool,
+                "data": {
+                    "header": str,
+                    "message": str,
+                    "prompt": str  # The prompt used
+                },
+                "error": Optional[str],
+                "metadata": {
+                    "tokens_used": int,
+                    "model": str
+                }
+            }
+        """
+        # Validate required context
+        if not self.validate_context(context, ["user", "stage", "attempt"]):
+            return {
+                "success": False,
+                "data": None,
+                "error": "Missing required context keys"
+            }
+        user = context["user"]
+        stage = context["stage"]
+        attempt = context.get("attempt", 1)
+        feedback = context.get("feedback", None)
+        try:
+            # Generate prompt with enhanced logic
+            prompt = self._generate_enhanced_prompt(user, stage, feedback)
+            # Get LLM instructions
+            instructions = self._get_llm_instructions(feedback)
+            # Generate message
+            response = self.llm.get_response(prompt=prompt, instructions=instructions)
+            if response is None:
+                return {
+                    "success": False,
+                    "data": None,
+                    "error": "LLM returned no response"
+                }
+            # Validate response structure
+            if not isinstance(response, dict) or "header" not in response or "message" not in response:
+                return {
+                    "success": False,
+                    "data": None,
+                    "error": "Invalid response structure from LLM"
+                }
+            return {
+                "success": True,
+                "data": {
+                    "header": response.get("header", ""),
+                    "message": response.get("message", ""),
+                    "prompt": prompt
+                },
+                "error": None,
+                "metadata": {
+                    "model": self.core_config.model,
+                    "attempt": attempt
+                }
+            }
+        except Exception as e:
+            self.log_error(f"Error generating message: {str(e)}")
+            return {
+                "success": False,
+                "data": None,
+                "error": str(e)
+            }
+    def _generate_enhanced_prompt(self, user: pd.Series, stage: int, feedback: Optional[str] = None) -> str:
+        """
+        Generate enhanced prompt based on instructions vs examples logic.
+        Args:
+            user: User data
+            stage: Campaign stage
+            feedback: Feedback from SecurityAgent (if regenerating)
+        Returns:
+            Formatted prompt string
+        """
+        # Determine if we have instructions and/or examples
+        has_campaign_instructions = bool(self.core_config.campaign_instructions)
+        has_message_instructions = bool(self.core_config.per_message_instructions)
+        has_instructions = has_campaign_instructions or has_message_instructions
+        has_examples = bool(self.core_config.sample_example)
+        # Build prompt components
+        if stage == 1:
+            prompt = self._build_initial_message_prompt(user, has_instructions, has_examples, feedback)
+        else:
+            prompt = self._build_followup_message_prompt(user, has_instructions, has_examples, feedback)
+        return prompt
+    def _build_initial_message_prompt(self, user: pd.Series, has_instructions: bool,
+                                     has_examples: bool, feedback: Optional[str]) -> str:
+        """
+        Build prompt for initial message (stage 1).
+        Implements different strategies based on instructions vs examples:
+        - Instructions + Examples: Prioritize instructions, use examples for style/voice
+        - Only Examples: Use examples to understand vocabulary, style, voice, then personalize
+        """
+        # Start with input context
+        input_context = self._get_input_context(has_instructions, has_examples, feedback)
+        # Add examples if available
+        example_section = self._get_example_section(has_instructions, has_examples)
+        # Add user profile
+        user_profile = self._get_user_profile(user)
+        # Add conditional prompts (birthday, etc.)
+        conditional_prompts = self._get_conditional_prompts(user)
+        # Add instructions if available
+        instructions_section = self._get_instructions_section(has_instructions, has_examples)
+        # Add recommendation instructions
+        recommendation_section = self._get_recommendation_instructions(user)
+        # Add output instructions
+        output_section = self._get_output_instructions()
+        # Assemble the prompt
+        prompt = f"""
+{input_context}
+{example_section}
+{user_profile}
+{conditional_prompts}
+{instructions_section}
+{recommendation_section}
+{output_section}
+"""
+        return prompt.strip()
+    def _build_followup_message_prompt(self, user: pd.Series, has_instructions: bool,
+                                      has_examples: bool, feedback: Optional[str]) -> str:
+        """
+        Build prompt for follow-up message (stages 2-11).
+        Includes previous message history to avoid repetition.
+        """
+        # Get previous messages
+        previous_messages_str = str(user.get("previous_messages", ""))
+        # Start with context about previous messages
+        previous_context = f"""
+We have previously sent these push notifications to the user and the user has not re-engaged yet:
+**Previous Messages:**
+{previous_messages_str}
+**CRITICAL**: The new message must be different from previous messages. Avoid using similar words, phrases, and vocabulary from the previous sends. Check the last 2 messages especially carefully.
+(Focus on structure/tone, not just words. "Ready to sing?" vs "Ready to practice?" = TOO SIMILAR)
+"""
+        # Build the rest similar to initial message
+        input_context = self._get_input_context(has_instructions, has_examples, feedback)
+        example_section = self._get_example_section(has_instructions, has_examples)
+        user_profile = self._get_user_profile(user)
+        conditional_prompts = self._get_conditional_prompts(user)
+        instructions_section = self._get_instructions_section(has_instructions, has_examples)
+        recommendation_section = self._get_recommendation_instructions(user)
+        output_section = self._get_output_instructions()
+        prompt = f"""
+{previous_context}
+{input_context}
+{example_section}
+{user_profile}
+{conditional_prompts}
+{instructions_section}
+{recommendation_section}
+{output_section}
+"""
+        return prompt.strip()
+    def _get_input_context(self, has_instructions: bool, has_examples: bool,
+                          feedback: Optional[str]) -> str:
+        """Get the input context based on instructions/examples availability."""
+        # Add feedback if this is a regeneration attempt
+        feedback_section = ""
+        if feedback:
+            feedback_section = f"""
+**IMPORTANT - Previous Attempt Feedback:**
+{feedback}
+Please address the feedback above in this new attempt.
+"""
+        if has_instructions:
+            context = f"""
+Your task is to generate a personalized 'header' and 'message' as a push notification for a student learning {self.core_config.get_instrument()}.
+You should follow the instructions provided and use the examples (if available) to understand our brand's voice, style, and tone.
+**Character Limits**: header < {self.core_config.config_file["header_limit"]} and message < {self.core_config.config_file["message_limit"]} characters.
+{feedback_section}
+"""
+        elif has_examples:
+            # Only examples, no instructions
+            context = f"""
+Your task is to generate ONE personalized 'header' and 'message' as a push notification for a student learning {self.core_config.get_instrument()}.
+**IMPORTANT**: Carefully analyze the example messages provided below to understand our brand's:
+- Vocabulary and word choices
+- Writing style and tone for header and message
+- Voice and personality
+- Sentence structure and rhythm for header and message
+- Character and feel
+Stick to the vocabulary and style. Select one of the messages, Modify the example slightly to make it more personalized giving the instructions and provide one modified personalized 'header' and 'message' as the output.
+**Character Limits**: header < {self.core_config.config_file["header_limit"]} and message < {self.core_config.config_file["message_limit"]} characters.
+{feedback_section}
+"""
+        else:
+            # No instructions, no examples (fallback)
+            context = f"""
+Your task is to generate a personalized 'header' and 'message' as a push notification for a student learning {self.core_config.get_instrument()}.
+**Character Limits**: header < {self.core_config.config_file["header_limit"]} and message < {self.core_config.config_file["message_limit"]} characters.
+{feedback_section}
+"""
+        return context.strip()
+    def _get_example_section(self, has_instructions: bool, has_examples: bool) -> str:
+        """Get the examples section with appropriate framing."""
+        if not self.core_config.sample_example:
+            return ""
+        if has_instructions and has_examples:
+            # Both: examples demonstrate style/voice
+            header = "### **Example Messages (demonstrating our brand's voice and style):**"
+        elif has_examples and not has_instructions:
+            # Only examples: analyze these to understand brand
+            header = "### **Example Messages:**\nStudy these examples, and stick to the vocabulary and style. Select one of them, Modify the example slightly to make it more personalized giving the instructions and provide one modified personalized 'header' and 'message' as the output."
+        else:
+            header = "### **Example Messages:**"
+        return f"""
+{header}
+{self.core_config.sample_example}
+"""
+    def _get_user_profile(self, user: pd.Series) -> str:
+        """Get user profile section."""
+        user_info = f"""
+### **User Information - Use to Personalize the Message:**
+- The user is learning {self.core_config.get_instrument()}.
+- {self.core_config.segment_info if self.core_config.segment_info else ""}
+- User profile (use indirectly to improve personalization):
+{user.get("user_info", "Not available")}
+"""
+        return user_info.strip()
+    def _get_conditional_prompts(self, user: pd.Series) -> str:
+        """
+        Get conditional prompt injections (e.g., birthday reminders).
+        Returns:
+            Conditional prompt section or empty string
+        """
+        conditional_sections = []
+        # Birthday reminder
+        if pd.notna(user.get("birthday_reminder")) and user.get("birthday_reminder") not in [None, [], {}]:
+            days_until = user.get("birthday_reminder")
+            birthday_prompt = f"""
+### **Special Note - Birthday Reminder:**
+The user has a birthday coming up in {days_until} days. Include a brief, friendly birthday mention in the message if it fits naturally.
+"""
+            conditional_sections.append(birthday_prompt.strip())
+        # Future conditional prompts can be added here
+            # First name
+        if pd.notna(user.get("first_name")) and user.get("first_name") not in [None, [], {}]:
+            name = user.get("first_name")
+            name_prompt = f"""
+User name is: {name} ; if the name is a valid name (e.g. not email, none sense), use the name in the 'header'. If we have used their name in our previous messages, DON'T USE IT AGAIN.
+"""
+            conditional_sections.append(name_prompt.strip())
+        # Example: achievement milestones, streaks, etc.
+        return "\n\n".join(conditional_sections) if conditional_sections else ""
+    def _get_instructions_section(self, has_instructions: bool, has_examples: bool) -> str:
+        """Get instructions section."""
+        if not has_instructions:
+            return ""
+        instructions_parts = []
+        # Campaign-wide instructions
+        if self.core_config.campaign_instructions:
+            instructions_parts.append(f"""### **Campaign Instructions:**
+{self.core_config.campaign_instructions}""")
+        # Per-message instructions
+        if self.core_config.per_message_instructions:
+            instructions_parts.append(f"""### **Additional Instructions for This Message:**
+{self.core_config.per_message_instructions}""")
+        if has_examples:
+            instructions_parts.append("\nUse the example messages to understand our brand's voice and style while following these instructions.")
+        return "\n\n".join(instructions_parts)
+    def _get_recommendation_instructions(self, user: pd.Series) -> str:
+        """Get content recommendation instructions."""
+        if not self.core_config.involve_recsys_result:
+            return ""
+        recommendation_info = user.get("recommendation_info", "")
+        if not recommendation_info:
+            return ""
+        instructions = f"""
+### **Content Recommendation Guidelines:**
+Below is the content we want to recommend to the user:
+{recommendation_info}
+When incorporating this content:
+1. **Title Usage**: Refer to the content naturally - paraphrase or describe it, don't quote verbatim. Avoid promotional tone.
+2. **Content Type**: Mention the type (course, workout, etc.) only if it flows naturally.
+3. **Artist/Instructor**: If the full name is available, mention it casually if appropriate. If only first name, do NOT include it. NEVER assume or hallucinate names.
+4. **Tone**: Keep it light, supportive, engaging, and personal.
+5. **Time Reference**: NEVER use time-related words ("new," "recent," "latest") or imply recency.
+Make the recommendation feel personalized and casually relevant, not generic or marketing-like.
+"""
+        return instructions.strip()
+    def _get_output_instructions(self) -> str:
+        """Get output format instructions."""
+        instructions = f"""
+### **Output Format:**
+Return a valid JSON object with this exact structure:
+{{
+  "header": "your generated header",
+  "message": "your generated message"
+}}
+**Remember**:
+- header < {self.core_config.config_file["header_limit"]} characters
+- message < {self.core_config.config_file["message_limit"]} characters
+- Valid JSON format
+"""
+        return instructions.strip()
+    def _get_llm_instructions(self, feedback: Optional[str]) -> str:
+        """
+        Get system-level instructions for the LLM.
+        Args:
+            feedback: Feedback from previous rejection (if any)
+        Returns:
+            System instructions string
+        """
+        banned_phrases = "\n".join(f"- {word}" for word in self.core_config.config_file.get("AI_Jargon", []))
+        brand_jargon_key = f"AI_phrases_{self.core_config.brand}"
+        jargon_list = "\n".join(f"- {word}" for word in self.core_config.config_file.get(brand_jargon_key, []))
+        instructions = f"""
+You are an expert at creating personalized, engaging push notifications for {self.core_config.get_instrument()} students.
+**Critical Rules:**
+- NEVER use time-related words ("new," "recent," "latest," etc.) or imply recency
+- Stay within character limits: header < {self.core_config.config_file["header_limit"]}, message < {self.core_config.config_file["message_limit"]}
+- Return valid JSON with "header" and "message" keys
+**Avoid these phrases and similar variations:**
+{banned_phrases}
+**Also avoid these phrases:**
+{jargon_list}
+**Tone**: Supportive, engaging, personal - never pushy or promotional.
+"""
+        if feedback:
+            instructions += f"""
+**IMPORTANT**: Your previous attempt was rejected. Address this feedback:
+{feedback}
+"""
+        return instructions.strip()

ai_messaging_system_v2/Messaging_system/agents/rejection_logger.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""
+Rejection Logger
+Logs all rejected messages to CSV files for evaluation and optimization purposes.
+One CSV file is created per campaign run.
+"""
+import csv
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, Optional
+import logging
+logger = logging.getLogger(__name__)
+class RejectionLogger:
+    """
+    Handles logging of rejected messages to CSV files.
+    Creates one CSV file per campaign run containing all rejections
+    with detailed information for LLM evaluation and process optimization.
+    """
+    def __init__(self, campaign_name: str, brand: str, stage: int, base_dir: Optional[str] = None):
+        """
+        Initialize the rejection logger.
+        Args:
+            campaign_name: Name of the campaign
+            brand: Brand name (drumeo, pianote, etc.)
+            stage: Campaign stage number
+            base_dir: Base directory for logs (defaults to ai_messaging_system_v2/logs/rejections/)
+        """
+        self.campaign_name = campaign_name
+        self.brand = brand
+        self.stage = stage
+        # Set up the log directory
+        if base_dir is None:
+            # Default to logs/rejections/ within the project
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / "logs" / "rejections"
+        else:
+            base_dir = Path(base_dir)
+        # Create directory if it doesn't exist
+        base_dir.mkdir(parents=True, exist_ok=True)
+        # Create unique filename with timestamp
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"{brand}_{campaign_name}_stage{stage}_{timestamp}_rejections.csv"
+        self.log_file = base_dir / filename
+        # CSV headers
+        self.headers = [
+            "timestamp",
+            "user_id",
+            "attempt_number",
+            "rejection_reason",
+            "validation_type",  # rule_based or llm_based
+            "detailed_feedback",
+            "generated_header",
+            "generated_message",
+            "header_length",
+            "message_length",
+            "model_used",
+            "prompt_excerpt",  # First 200 chars of prompt for reference
+            "recommendation_info",
+            "has_instructions",
+            "has_examples"
+        ]
+        # Initialize the CSV file with headers
+        self._initialize_csv()
+        logger.info(f"Rejection logger initialized: {self.log_file}")
+    def _initialize_csv(self):
+        """Create the CSV file and write headers."""
+        with open(self.log_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+            writer.writeheader()
+    def log_rejection(self,
+                      user_id: str,
+                      attempt_number: int,
+                      rejection_reason: str,
+                      validation_type: str,
+                      detailed_feedback: str,
+                      generated_header: str,
+                      generated_message: str,
+                      model_used: str,
+                      prompt: str = "",
+                      recommendation_info: str = "",
+                      has_instructions: bool = False,
+                      has_examples: bool = False,
+                      additional_data: Optional[Dict[str, Any]] = None):
+        """
+        Log a rejected message to the CSV file.
+        Args:
+            user_id: ID of the user
+            attempt_number: Which attempt this was (1, 2, or 3)
+            rejection_reason: Brief reason for rejection
+            validation_type: "rule_based" or "llm_based"
+            detailed_feedback: Detailed feedback to help improve next attempt
+            generated_header: The header that was rejected
+            generated_message: The message that was rejected
+            model_used: LLM model that generated the message
+            prompt: The prompt used (optional, will be truncated)
+            recommendation_info: Content recommendation info (optional)
+            has_instructions: Whether instructions were provided
+            has_examples: Whether examples were provided
+            additional_data: Any additional data to log (optional)
+        """
+        try:
+            row_data = {
+                "timestamp": datetime.now().isoformat(),
+                "user_id": user_id,
+                "attempt_number": attempt_number,
+                "rejection_reason": rejection_reason,
+                "validation_type": validation_type,
+                "detailed_feedback": detailed_feedback,
+                "generated_header": generated_header,
+                "generated_message": generated_message,
+                "header_length": len(generated_header) if generated_header else 0,
+                "message_length": len(generated_message) if generated_message else 0,
+                "model_used": model_used,
+                "prompt_excerpt": prompt[:200] if prompt else "",
+                "recommendation_info": recommendation_info,
+                "has_instructions": has_instructions,
+                "has_examples": has_examples
+            }
+            # Add any additional data
+            if additional_data:
+                row_data.update(additional_data)
+            # Write to CSV
+            with open(self.log_file, 'a', newline='', encoding='utf-8') as f:
+                writer = csv.DictWriter(f, fieldnames=self.headers, extrasaction='ignore')
+                writer.writerow(row_data)
+            logger.debug(f"Logged rejection for user {user_id}, attempt {attempt_number}")
+        except Exception as e:
+            logger.error(f"Error logging rejection: {str(e)}")
+    def get_log_path(self) -> Path:
+        """Return the path to the log file."""
+        return self.log_file
+    def get_rejection_count(self) -> int:
+        """
+        Get the total number of rejections logged.
+        Returns:
+            Number of rejections (excluding header row)
+        """
+        try:
+            with open(self.log_file, 'r', encoding='utf-8') as f:
+                return sum(1 for _ in f) - 1  # Subtract header row
+        except Exception as e:
+            logger.error(f"Error counting rejections: {str(e)}")
+            return 0
+    def get_rejection_stats(self) -> Dict[str, Any]:
+        """
+        Get statistics about rejections.
+        Returns:
+            Dictionary with rejection statistics
+        """
+        try:
+            stats = {
+                "total_rejections": 0,
+                "rule_based_rejections": 0,
+                "llm_based_rejections": 0,
+                "by_attempt": {1: 0, 2: 0, 3: 0},
+                "common_reasons": {}
+            }
+            with open(self.log_file, 'r', encoding='utf-8') as f:
+                reader = csv.DictReader(f)
+                for row in reader:
+                    stats["total_rejections"] += 1
+                    # Count by validation type
+                    if row["validation_type"] == "rule_based":
+                        stats["rule_based_rejections"] += 1
+                    elif row["validation_type"] == "llm_based":
+                        stats["llm_based_rejections"] += 1
+                    # Count by attempt
+                    attempt = int(row["attempt_number"])
+                    if attempt in stats["by_attempt"]:
+                        stats["by_attempt"][attempt] += 1
+                    # Count common reasons
+                    reason = row["rejection_reason"]
+                    stats["common_reasons"][reason] = stats["common_reasons"].get(reason, 0) + 1
+            return stats
+        except Exception as e:
+            logger.error(f"Error getting rejection stats: {str(e)}")
+            return {}

ai_messaging_system_v2/Messaging_system/agents/security_agent.py ADDED Viewed

	@@ -0,0 +1,459 @@

+"""
+Security Agent
+Validates generated messages using rule-based and LLM-based checks.
+Acts as a firewall to ensure messages meet quality standards and expectations.
+"""
+import json
+import re
+from typing import Dict, Any, Optional, List, Tuple
+from .base_agent import BaseAgent
+from ..LLM import LLM
+class SecurityAgent(BaseAgent):
+    """
+    Security Agent - Validates messages with rule-based and LLM-based checks.
+    This agent:
+    - Performs fast rule-based validation first
+    - Runs LLM-based validation for instruction adherence, content accuracy, and authenticity
+    - Provides detailed feedback for regeneration
+    - Can approve or reject messages
+    """
+    def __init__(self, core_config: Any):
+        """
+        Initialize the Security Agent.
+        Args:
+            core_config: CoreConfig instance
+        """
+        super().__init__(name="SecurityAgent", core_config=core_config)
+        self.llm = LLM(core_config)
+    def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute message validation.
+        Args:
+            context: {
+                "header": str,
+                "message": str,
+                "user": pd.Series,
+                "prompt": str,
+                "attempt": int
+            }
+        Returns:
+            {
+                "success": bool,  # True = approved, False = rejected
+                "data": {
+                    "approved": bool,
+                    "rejection_reason": Optional[str],
+                    "detailed_feedback": Optional[str],
+                    "validation_type": str  # "rule_based" or "llm_based"
+                },
+                "error": Optional[str]
+            }
+        """
+        # Validate required context
+        if not self.validate_context(context, ["header", "message", "user"]):
+            return {
+                "success": False,
+                "data": {
+                    "approved": False,
+                    "rejection_reason": "Missing context",
+                    "detailed_feedback": "Required validation context missing",
+                    "validation_type": "rule_based"
+                },
+                "error": "Missing required context keys"
+            }
+        header = context["header"]
+        message = context["message"]
+        user = context["user"]
+        prompt = context.get("prompt", "")
+        attempt = context.get("attempt", 1)
+        try:
+            # Step 1: Rule-based validation (fast)
+            rule_result = self._rule_based_validation(header, message)
+            if not rule_result["passed"]:
+                self.log_info(f"Rule-based validation failed: {rule_result['reason']}")
+                return {
+                    "success": False,
+                    "data": {
+                        "approved": False,
+                        "rejection_reason": rule_result["reason"],
+                        "detailed_feedback": rule_result["feedback"],
+                        "validation_type": "rule_based"
+                    },
+                    "error": None
+                }
+            # skipping LLM-based validation for now - UI purposes --> be faster
+            # Step 2: LLM-based validation (includes similarity check for follow-up messages)
+            # llm_result = self._llm_based_validation(header, message, user, prompt)
+            # if not llm_result["passed"]:
+            #     self.log_info(f"LLM-based validation failed: {llm_result['reason']}")
+            #     return {
+            #         "success": False,
+            #         "data": {
+            #             "approved": False,
+            #             "rejection_reason": llm_result["reason"],
+            #             "detailed_feedback": llm_result["feedback"],
+            #             "validation_type": "llm_based"
+            #         },
+            #         "error": None
+            #     }
+            # All validations passed
+            self.log_info("Message approved")
+            return {
+                "success": True,
+                "data": {
+                    "approved": True,
+                    "rejection_reason": None,
+                    "detailed_feedback": None,
+                    "validation_type": "approved"
+                },
+                "error": None
+            }
+        except Exception as e:
+            self.log_error(f"Error during validation: {str(e)}")
+            return {
+                "success": False,
+                "data": {
+                    "approved": False,
+                    "rejection_reason": "Validation error",
+                    "detailed_feedback": str(e),
+                    "validation_type": "error"
+                },
+                "error": str(e)
+            }
+    def _rule_based_validation(self, header: str, message: str) -> Dict[str, Any]:
+        """
+        Perform fast rule-based validation.
+        Checks:
+        - Character limits
+        - Empty content
+        - Banned phrases
+        - Time-related words
+        - Basic structure
+        Returns:
+            {
+                "passed": bool,
+                "reason": Optional[str],
+                "feedback": Optional[str]
+            }
+        """
+        # Check for empty or None
+        if not header or not message:
+            return {
+                "passed": False,
+                "reason": "Empty header or message",
+                "feedback": "Both header and message must have content. Please generate non-empty text."
+            }
+        # Strip whitespace for checks
+        header = header.strip()
+        message = message.strip()
+        # Check character limits
+        header_limit = self.core_config.config_file.get("header_limit", 30)
+        message_limit = self.core_config.config_file.get("message_limit", 110)
+        header_len = len(header)
+        message_len = len(message)
+        if header_len > header_limit:
+            chars_over = header_len - header_limit
+            return {
+                "passed": False,
+                "reason": f"Header exceeds limit ({header_len}/{header_limit} chars)",
+                "feedback": f"Header is {chars_over} characters too long. Shorten it to {header_limit} characters while keeping the key message."
+            }
+        if message_len > message_limit:
+            chars_over = message_len - message_limit
+            return {
+                "passed": False,
+                "reason": f"Message exceeds limit ({message_len}/{message_limit} chars)",
+                "feedback": f"Message is {chars_over} characters too long. Reduce to {message_limit} characters by removing unnecessary words while preserving personalization and content recommendation."
+            }
+        # Check for banned AI jargon
+        banned_check = self._check_banned_phrases(header, message)
+        if not banned_check["passed"]:
+            return banned_check
+        # Check for Singeo-specific 'your instrument' phrase
+        if self.core_config.brand == "singeo":
+            instrument_check = self._check_instrument_phrases_singeo(header, message)
+            if not instrument_check["passed"]:
+                return instrument_check
+        # Check for placeholder strings (template variables)
+        placeholder_check = self._check_placeholder_strings(header, message)
+        if not placeholder_check["passed"]:
+            return placeholder_check
+        # All rule-based checks passed
+        return {
+            "passed": True,
+            "reason": None,
+            "feedback": None
+        }
+    def _check_banned_phrases(self, header: str, message: str) -> Dict[str, Any]:
+        """Check for banned AI jargon and brand-specific phrases."""
+        combined_text = (header + " " + message).lower()
+        # Check AI jargon
+        ai_jargon = self.core_config.config_file.get("AI_Jargon", [])
+        for phrase in ai_jargon:
+            if phrase.lower() in combined_text:
+                return {
+                    "passed": False,
+                    "reason": f"Contains banned phrase: '{phrase}'",
+                    "feedback": f"Remove the phrase '{phrase}' and replace it with more natural, conversational language."
+                }
+        # Check brand-specific banned phrases
+        brand_key = f"AI_phrases_{self.core_config.brand}"
+        brand_phrases = self.core_config.config_file.get(brand_key, [])
+        for phrase in brand_phrases:
+            if phrase.lower() in combined_text:
+                return {
+                    "passed": False,
+                    "reason": f"Contains brand-banned phrase: '{phrase}'",
+                    "feedback": f"Remove '{phrase}' and use more authentic, brand-appropriate language."
+                }
+        return {"passed": True, "reason": None, "feedback": None}
+    def _check_instrument_phrases_singeo(self, header: str, message: str) -> Dict[str, Any]:
+        """
+        Check for 'your instrument' and related phrases specifically for Singeo brand.
+        This is an extra safety check beyond the banned phrases list.
+        """
+        instrument_phrases = [
+            "your instrument", "the instrument",
+            "practice your instrument", "your singing instrument"
+        ]
+        combined_text = (header + " " + message).lower()
+        for phrase in instrument_phrases:
+            if phrase.lower() in combined_text:
+                return {
+                    "passed": False,
+                    "reason": f"Contains inappropriate phrase for Singeo: '{phrase}'",
+                    "feedback": f"For Singeo, avoid using '{phrase}'. Singing/vocals are not instruments. Use more natural language like 'practice' or 'continue learning' without referencing instruments."
+                }
+        return {"passed": True, "reason": None, "feedback": None}
+    def _check_placeholder_strings(self, header: str, message: str) -> Dict[str, Any]:
+        """
+        Check for placeholder/template strings that should have been replaced.
+        Common patterns: [user_name], {user_name}, {{variable}}, [content_title], etc.
+        """
+        combined_text = header + " " + message
+        # Pattern to match common placeholder formats
+        placeholder_patterns = [
+            r'\[[\w_]+\]',          # [user_name], [first_name], [content_title]
+            r'\{[\w_]+\}',          # {user_name}, {name}
+            r'\{\{[\w_]+\}\}',      # {{user_name}}, {{variable}}
+            r'<[\w_]+>',            # <user_name>, <placeholder>
+        ]
+        for pattern in placeholder_patterns:
+            matches = re.findall(pattern, combined_text)
+            if matches:
+                # Get unique placeholders
+                unique_placeholders = list(set(matches))
+                placeholder_str = ", ".join(f"'{p}'" for p in unique_placeholders[:3])  # Show up to 3
+                return {
+                    "passed": False,
+                    "reason": f"Contains unreplaced placeholder(s): {placeholder_str}",
+                    "feedback": f"Do not use template placeholders like {placeholder_str}. Generate actual content with real names and values. If you don't have specific information, create natural generic text without placeholders."
+                }
+        return {"passed": True, "reason": None, "feedback": None}
+    def _get_last_n_previous_messages(self, user: Any, n: int = 2) -> Optional[str]:
+        """
+        Extract the last N previous messages from user data.
+        Args:
+            user: User data with previous_messages field
+            n: Number of most recent messages to extract (default: 2)
+        Returns:
+            Formatted string of last N messages, or None if no previous messages
+        """
+        previous_messages = user.get("previous_messages", "")
+        # If no previous messages or empty, return None
+        if not previous_messages or previous_messages in [None, "", "[]", {}]:
+            return None
+        # Convert to string
+        previous_messages_str = str(previous_messages)
+        # Try to parse if it's a list/structured format
+        try:
+            if previous_messages_str.strip().startswith('['):
+                parsed_messages = json.loads(previous_messages_str)
+                if isinstance(parsed_messages, list) and len(parsed_messages) > 0:
+                    # Get last N messages
+                    last_n_messages = parsed_messages[-n:] if len(parsed_messages) >= n else parsed_messages
+                    return json.dumps(last_n_messages, indent=2)
+        except:
+            # If parsing fails, return the raw string (truncate if too long)
+            if len(previous_messages_str) > 500:
+                return previous_messages_str[-500:]  # Last 500 chars
+        return previous_messages_str
+    def _llm_based_validation(self, header: str, message: str,
+                             user: Any, prompt: str) -> Dict[str, Any]:
+        """
+        Perform LLM-based validation for instruction adherence, content accuracy, and authenticity.
+        Args:
+            header: Generated header
+            message: Generated message
+            user: User data
+            prompt: Original prompt used for generation
+        Returns:
+            {
+                "passed": bool,
+                "reason": Optional[str],
+                "feedback": Optional[str]
+            }
+        """
+        # Build validation prompt
+        validation_prompt = self._build_validation_prompt(header, message, user, prompt)
+        # Get validation instructions
+        validation_instructions = """
+You are a quality assurance expert for push notifications. Your task is to validate whether the generated message meets all requirements.
+Analyze the message and return a JSON response with your assessment.
+Return format:
+{
+  "approved": true/false,
+  "issues": ["list of any issues found"],
+  "feedback": "detailed feedback for improvement (if not approved)"
+}
+"""
+        try:
+            # Get LLM validation with validation_response mode
+            response = self.llm.get_response(
+                prompt=validation_prompt,
+                instructions=validation_instructions,
+                validation_mode="validation_response"  # Use validation response mode
+            )
+            if response is None:
+                # If LLM fails, default to approval (rule-based already passed)
+                self.log_warning("LLM validation failed to return response, defaulting to approval")
+                return {"passed": True, "reason": None, "feedback": None}
+            # Parse response
+            approved = response.get("approved", True)
+            issues = response.get("issues", [])
+            feedback = response.get("feedback", "")
+            if not approved:
+                # Combine issues into reason
+                reason = "; ".join(issues) if issues else "LLM quality check failed"
+                return {
+                    "passed": False,
+                    "reason": reason,
+                    "feedback": feedback if feedback else "Please improve the message quality and adherence to guidelines."
+                }
+            return {"passed": True, "reason": None, "feedback": None}
+        except Exception as e:
+            self.log_warning(f"LLM validation error: {str(e)}, defaulting to approval")
+            # Default to approval if LLM validation fails
+            return {"passed": True, "reason": None, "feedback": None}
+    def _build_validation_prompt(self, header: str, message: str,
+                                 user: Any, original_prompt: str) -> str:
+        """
+        Build the validation prompt for LLM-based checking.
+        Args:
+            header: Generated header
+            message: Generated message
+            user: User data
+            original_prompt: The prompt used to generate the message
+        Returns:
+            Validation prompt string
+        """
+        # Extract key context from original prompt
+        has_recommendation = bool(user.get("recommendation_info"))
+        recommendation_text = ""
+        if has_recommendation:
+            recommendation_text = f"""
+**Content Recommendation:**
+{user.get("recommendation_info", "")[:300]}...
+"""
+        # Check for previous messages for similarity validation
+        last_2_messages = self._get_last_n_previous_messages(user, n=2)
+        similarity_check = ""
+        if last_2_messages:
+            similarity_check = f"""
+**Similarity**: Does it sound too similar to previous messages? (Focus on structure/tone, not just words. "Ready to sing?" vs "Ready to practice?" = TOO SIMILAR)
+  Previous messages: {last_2_messages}"""
+        prompt = f"""Validate this push notification:
+**Header:** {header}
+**Message:** {message}
+**Be LENIENT - only reject OBVIOUS problems.**
+**Check:**
+- If the name of the artist/instructor is used in the message, is it correct (based on Content Recommendation)?
+recommendation_text: {recommendation_text}
+- **Time Words**: Does it imply CONTENT is new/recent?
+   - FORBIDDEN: "new course", "recent release", "latest content" (content recency)
+   - OK: "practice today", "start today" (action timing)
+   Only reject if implies content recency, NOT action timing.
+- {similarity_check}
+**REJECT if:**
+- incorrect name of the artist/instructor (if applicable)
+- Implies content is new/recent
+- Sounds too similar to previous (if applicable)
+**Otherwise APPROVE**
+Feedback (if rejected): 1-2 sentences on how to fix.
+"""
+        return prompt.strip()

ai_messaging_system_v2/README.md ADDED Viewed

	@@ -0,0 +1,489 @@

+# Intelligent Music Education Messaging Platform
+An AI-powered messaging platform that generates personalized push notifications for music education platforms based on user engagement patterns, behavior analysis, and content preferences. Built specifically for Musora's music education ecosystem (Singeo, Pianote, Guitareo, Drumeo).
+## 🎯 Overview
+This project automatically generates contextually relevant, personalized push notification messages for users eligible for specific campaign stages. The system analyzes user behavior, engagement patterns, learning preferences, and interaction history to create tailored re-engagement campaigns that drive meaningful platform interaction.
+High-level no technical documentation --> https://docs.google.com/document/d/1ifhSrwhU-RN9YpSW84bfHYTs5MyfeLJ5ONgmBDuZ5_I/edit?tab=t.0
+## 🏗️ System Architecture
+### Core Pipeline Flow
+1. **Input Configuration** - Configure message parameters and test mode settings
+2. **User Eligibility** - Fetch eligible users from Snowflake or test CSV files
+3. **Data Collection** - Gather user profiles, interaction history, and content data
+4. **Content Recommendation** - Select personalized content recommendations using AI or default routing
+5. **Prompt Generation** - Create context-aware prompts incorporating user data and recommendations
+6. **Message Generation** - Generate personalized messages using multiple LLM providers
+7. **Post-Processing** - Structure output and validate message format
+8. **Storage & Analytics** - Store results and cost analysis to Snowflake (production mode)
+## 📁 Project Structure
+```
+ai_messaging_system_v2/
+├── generate_message_parallel.py # Main pipeline orchestrator with parallel processing
+├── configs/                     # Modular configuration system
+│   ├── config_loader.py        # Main config loader with helper functions
+│   ├── system/
+│   │   └── system_config.py    # System-wide settings
+│   ├── singeo/
+│   │   └── campaigns.py        # Singeo brand campaigns
+│   ├── drumeo/
+│   │   └── campaigns.py        # Drumeo brand campaigns
+│   ├── guitareo/
+│   │   └── campaigns.py        # Guitareo brand campaigns
+│   ├── pianote/
+│   │   └── campaigns.py        # Pianote brand campaigns
+│   ├── test_data/
+│   │   └── test_config.py      # Test campaign data
+│   └── README.md               # Configuration documentation
+├── Messaging_system/            # Core messaging modules
+│   ├── Permes.py               # Main orchestration class
+│   ├── CoreConfig.py           # Configuration and state management
+│   ├── DataCollector.py        # Data fetching and user profile creation
+│   ├── SnowFlakeConnection.py  # Database operations and queries
+│   ├── PromptGenerator.py      # AI prompt creation and personalization
+│   ├── Message_generator.py    # LLM-based message generation
+│   ├── LLM.py                  # Multi-provider LLM interface (OpenAI, Google)
+│   ├── LLMR.py                 # AI-powered content recommender
+│   └── Homepage_Recommender.py # Default recommendation fallback
+└── Data/                       # Test data
+    ├── test_camp.json          # Test campaign configuration
+    └── test_staff.csv          # Test user data
+```
+## 🔧 Core Components
+### 1. Permes (Main Orchestrator)
+The central class that coordinates the entire message generation pipeline:
+- **Purpose**: Orchestrates data collection, recommendation, prompt generation, and message creation
+- **Key Methods**:
+  - `create_personalize_messages()`: Main entry point for message generation
+  - `_create_personalized_message()`: Handles first-time user messages
+  - `_create_followup_personalized_message()`: Manages follow-up campaign messages
+- **Features**: Cost calculation, token tracking, Snowflake integration
+### 2. CoreConfig (Configuration Manager)
+Manages system configuration and state throughout the pipeline:
+- **Purpose**: Centralized configuration and state management
+- **Key Features**:
+  - LLM model configuration (OpenAI, Google)
+  - Brand-specific settings (Drumeo, Pianote, Guitareo, Singeo)
+  - Token usage tracking and rate limiting
+  - Personalization settings
+### 3. DataCollector (Data Aggregation)
+Handles user data collection and profile creation:
+- **Purpose**: Fetch and prepare user data from multiple sources
+- **Key Features**:
+  - User ID extraction and validation
+  - Multi-source data merging (users, interactions, recommendations)
+  - Birthday reminder calculation
+  - Historical message data for follow-ups
+### 4. SnowFlakeConnection (Database Interface)
+Manages all database operations and queries:
+- **Purpose**: Interface with Snowflake data warehouse
+- **Key Features**:
+  - User eligibility queries with stage progression logic
+  - Content and interaction data retrieval
+  - Message storage and cost tracking
+  - Campaign management queries
+### 5. PromptGenerator (AI Prompt Creation)
+Creates personalized prompts for LLM message generation:
+- **Purpose**: Generate context-aware prompts incorporating user data
+- **Key Features**:
+  - User profile integration
+  - Content recommendation instructions
+  - Campaign-wide instruction injection
+  - Per-message (stage-specific) instruction injection
+  - Follow-up message context
+  - Brand voice integration
+### 6. Message_generator (LLM Interface)
+Handles LLM communication and message generation:
+- **Purpose**: Generate personalized messages using AI models
+- **Key Features**:
+  - Multi-provider LLM support
+  - JSON response parsing and validation
+  - Output structure management
+  - Character limit enforcement
+### 7. LLM (Multi-Provider Support)
+Provides unified interface to multiple LLM providers:
+- **Purpose**: Abstract LLM provider differences
+- **Supported Providers**: OpenAI (GPT models), Google (Gemini models)
+- **Key Features**:
+  - Automatic fallback between providers
+  - Retry logic and error handling
+  - Token usage tracking
+  - Response validation
+### 8. LLMR (AI Content Recommender)
+AI-powered content recommendation system:
+- **Purpose**: Select optimal content for user recommendations
+- **Features**:
+  - User profile-based AI recommendations
+  - Specific content mode (force same content for all users)
+  - Random selection from top choices
+  - Content filtering and validation
+  - Integration with recommendation systems
+### 9. Homepage_Recommender (Default Fallback)
+Provides default recommendation when AI selection fails:
+- **Purpose**: Ensure users always receive a recommendation
+- **Feature**: Routes users to personalized "For You" section
+## 🚀 Getting Started
+### Prerequisites
+- Python 3.8+
+- Snowflake account and credentials
+- OpenAI API key
+- Google AI API key (optional, for Gemini models)
+### Required Dependencies
+```bash
+pip install pandas snowflake-snowpark-python openai google-genai python-dotenv tqdm
+```
+### Environment Variables
+Create a `.env` file with the following variables:
+```env
+# Snowflake Configuration
+SNOWFLAKE_USER=your_snowflake_user
+SNOWFLAKE_PASSWORD=your_password
+SNOWFLAKE_ACCOUNT=your_account
+SNOWFLAKE_ROLE=your_role
+SNOWFLAKE_DATABASE=your_database
+SNOWFLAKE_WAREHOUSE=your_warehouse
+SNOWFLAKE_SCHEMA=your_schema
+# AI Provider Keys
+OPENAI_API_KEY=your_openai_key
+GOOGLE_API_KEY=your_google_key
+```
+### Basic Usage
+1. **Configure System**: Edit configuration files in `configs/` directory:
+   - `configs/system/system_config.py` - System-wide settings
+   - `configs/{brand}/campaigns.py` - Brand-specific campaigns
+   - See `configs/README.md` for detailed configuration guide
+2. **Set Parameters**: Modify `generate_message_parallel.py` main section:
+```python
+if __name__ == "__main__":
+    # Input parameters
+    message_numbers = range(1, 2)  # Stage number(s) to process
+    test_mode = False              # If True, uses test campaign config
+    run_for_all_messages = False   # If True, runs for all 11 stages
+    brand = "drumeo"               # Brand: "singeo", "guitareo", "pianote", "drumeo"
+    campaign_type = "re_engagement" # Campaign type
+    # Parallel processing parameters
+    chunk_size = 1000              # Maximum chunk size (actual size determined dynamically)
+    max_workers = 5                # Number of parallel workers (None = auto-detect)
+    # Generate messages
+    for message_number in message_numbers:
+        results = generate_messages_parallel(
+            message_number=message_number,
+            test_mode=test_mode,
+            run_for_all_messages=run_for_all_messages,
+            brand=brand,
+            campaign_type=campaign_type,
+            chunk_size=chunk_size,
+            max_workers=max_workers
+        )
+```
+3. **Run Pipeline**:
+```bash
+python generate_message_parallel.py
+```
+### Test Mode
+Enable test mode to use embedded test data instead of Snowflake:
+```python
+test_mode = True  # Uses TEST_STAFF_DATA from configs/test_data/test_config.py
+```
+## 🎵 Brand Support
+The platform supports all Musora education brands:
+| Brand    | Instrument | Emoji | Base URL |
+|----------|------------|-------|----------|
+| Drumeo   | Drums      | 🥁    | drumeo   |
+| Pianote  | Piano      | 🎹    | pianote  |
+| Guitareo | Guitar     | 🎸    | guitareo |
+| Singeo   | Singing    | 🎤    | singeo   |
+## 🤖 AI Model Support
+### OpenAI Models
+- GPT-4o-mini
+- GPT-4.1-mini
+- GPT-5-mini
+- GPT-5-nano (default fallback)
+### Google Models
+- Gemini-2.5-flash
+- Gemini-2.0-flash
+- Gemini-2.5-flash-lite
+### Model Selection Strategy
+- Primary model attempts with retries
+- Automatic fallback to alternative models
+- Cost optimization through model pricing tiers
+- Failure threshold-based model switching
+## 📊 Features
+### Personalization
+- User profile integration
+- Learning history analysis
+- Engagement pattern recognition
+- Content preference matching
+- Birthday reminders
+- Campaign-wide instructions for consistent messaging
+- Stage-specific instructions for targeted guidance
+### Multi-Stage Campaigns
+- 11-stage re-engagement sequences
+- Previous message context awareness
+- Stage progression logic
+- Cooldown period management
+### Content Recommendations
+- AI-powered content selection
+- Specific content promotion (force same content for all users)
+- Popular content fallback
+- Brand-appropriate filtering
+- Content type diversity (courses, workouts, songs, quick tips)
+### Quality Assurance
+- Character limit enforcement
+- JSON validation
+- Content filtering (banned content)
+- Message uniqueness verification
+### Analytics & Monitoring
+- Token usage tracking
+- Cost calculation per campaign
+- Performance metrics
+- Error logging and handling
+## 🔄 Campaign Flow
+1. **Stage 1**: Initial engagement message with personalized content
+2. **Stages 2-11**: Follow-up messages with:
+   - Context from previous messages
+   - Fresh content recommendations
+   - Varied messaging approaches
+   - Escalating engagement tactics
+## 💰 Cost Management
+- Real-time token usage tracking
+- Per-message cost calculation
+- Model pricing optimization
+- Rate limiting and throttling
+- Batch processing efficiency
+## 🛡️ Error Handling
+- Multi-level retry logic
+- Graceful fallback mechanisms
+- Comprehensive logging
+- Data validation at each stage
+- User-friendly error messages
+## 📈 Performance Optimization
+### Parallel Processing Architecture
+- **Multiprocessing**: Uses Python's multiprocessing.Pool for true parallel execution
+- **Dynamic Chunking**: Intelligently distributes users among workers for maximum utilization
+  - Minimum chunk size: 20 users
+  - Maximum chunk size: 1000 users
+  - Equal distribution across available workers
+  - Example: 1000 users with 5 workers → 5 chunks of 200 (uses all workers)
+- **Independent Workers**: Each worker process creates its own Snowflake session for thread safety
+- **Concurrent Chunk Processing**: Processes multiple chunks simultaneously using CPU cores
+- **Scalable Workers**: Auto-detects CPU count or accepts custom worker count (capped at 8 by default)
+### Additional Optimizations
+- Efficient database queries with indexed lookups
+- Direct Snowflake writes within each worker process
+- Memory-efficient chunk processing
+- Real-time progress tracking and logging
+- Automatic result aggregation and summary statistics
+## 🧪 Testing
+The platform includes comprehensive testing capabilities:
+- Test mode with local CSV files
+- Mock data generation
+- Campaign simulation
+- Performance benchmarking
+- Cost estimation
+## 📝 Configuration
+The system uses a modular configuration structure organized by purpose and brand. All configurations are located in the `configs/` directory.
+### System Configuration (`configs/system/system_config.py`)
+Contains global system settings:
+```python
+SYSTEM_CONFIG = {
+    "user_info_features": ["first_name", "country", "instrument", ...],
+    "header_limit": 30,
+    "message_limit": 110,
+    "openai_models": ["gpt-4o-mini", "gpt-5-nano", ...],
+    "google_models": ["gemini-2.5-flash-lite", "gemini-2.5-flash", ...],
+    "model_failure_threshold": 3,
+    "banned_contents": [373883, 358813, ...],
+    "AI_Jargon": ["elevate", "enhance", "ignite", ...]
+}
+```
+### Campaign Configuration (`configs/{brand}/campaigns.py`)
+Each brand has its own configuration file with campaign-specific settings:
+```python
+CAMPAIGNS = {
+    "re_engagement": {
+        "campaign_view": "drumeo_re_engagement",
+        "campaign_name": "Drumeo - Inactive Members (for 3 days) - Re-engagement",
+        "brand": "drumeo",
+        # Campaign-wide instructions (optional)
+        "campaign_instructions": "Keep messages encouraging and upbeat",
+        "1": {  # Stage 1 configuration
+            "stage": 1,
+            "segment_info": "Students who haven't practiced...",
+            "recsys_contents": ["workout", "course", "quick_tips"],
+            "model": "gemini-2.5-flash-lite",
+            # Stage-specific instructions (optional)
+            "instructions": "Focus on the recommended content",
+            # Specific content promotion (optional)
+            "specific_content_id": None,  # Set to content_id to force specific content for all users
+            ...
+        },
+        # Stages 2-11...
+    }
+}
+```
+### Configuration Features
+#### Campaign-Wide Instructions
+Apply instructions to all stages of a campaign:
+```python
+"campaign_instructions": "Keep the tone friendly and encouraging"
+```
+#### Per-Message Instructions
+Add stage-specific guidance:
+```python
+"1": {
+    "stage": 1,
+    "instructions": "Emphasize the recommended content",
+    ...
+}
+```
+#### Specific Content Promotion
+Force a specific content for all users in a stage (e.g., for marketing campaigns):
+```python
+"2": {
+    "stage": 2,
+    "involve_recsys_result": True,
+    "specific_content_id": 12345,  # All users get this content
+    "instructions": "Promote this featured content",
+    ...
+}
+```
+When `specific_content_id` is set:
+- Overrides AI recommendations completely
+- All users receive the same content at that stage
+- No LLM tokens used for content selection (cost savings)
+- System validates content exists before generating messages
+- Set to `None` (default) for regular personalized recommendations
+Both instruction types are automatically injected into LLM prompts after the user profile section, providing contextual guidance for message generation.
+### Loading Configurations
+```python
+from configs.config_loader import get_system_config, get_campaign_config
+# Load system configuration
+system_config = get_system_config()
+# Load campaign configuration
+campaign = get_campaign_config("drumeo", "re_engagement", test_mode=False)
+# Access instructions
+campaign_instructions = campaign.get("campaign_instructions")
+stage_instructions = campaign["1"].get("instructions")
+```
+### Helper Functions
+- `get_system_config()`: Returns system-wide settings
+- `get_campaign_config(brand, campaign_type, test_mode)`: Retrieves brand-specific campaign configuration
+- `get_all_brands()`: Returns list of available brands
+- `get_campaign_types(brand)`: Returns available campaign types for a brand
+When `test_mode=True`, the system automatically uses test campaign names and test staff data.
+### Adding New Configurations
+See `configs/README.md` for detailed instructions on:
+- Adding new brands
+- Adding new campaign types
+- Modifying system settings
+- Adding campaign or stage-specific instructions
+## 🔮 Future Enhancements
+- A/B testing framework
+- Advanced analytics dashboard
+- Real-time personalization
+- Multi-language support
+- Enhanced recommendation algorithms
+- Integration with additional LLM providers
+## 📄 License
+This project is proprietary to Musora Media Inc. and is not licensed for public use.
+## 🤝 Contributing
+Reach out to [email protected] for questions or contributions.
+---
+**Built with ❤️ for music education by the Musora team**

ai_messaging_system_v2/UI_MODE_GUIDE.md ADDED Viewed

	@@ -0,0 +1,495 @@

+# UI Mode Guide
+This guide explains how to use the **UI Mode** feature of the AI Messaging System. UI Mode allows you to generate personalized messages locally without storing results in Snowflake, making it perfect for UI applications and testing.
+## Overview
+UI Mode provides:
+- **Local CSV Storage**: Messages saved to `ai_messaging_system_v2/Data/ui_output/messages.csv`
+- **Multi-Stage Support**: Single CSV file that grows with each stage (like Snowflake)
+- **Full Control**: Customize all campaign parameters dynamically
+- **Emoji Support**: UTF-8 encoding ensures proper emoji display
+- **No Snowflake Writes**: Everything stays local for UI visualization
+## Key Differences from Production/Test Modes
+| Feature | Production/Test Mode | UI Mode |
+|---------|---------------------|---------|
+| Data Storage | Snowflake database | Local CSV files |
+| Previous Stage Data | Read from Snowflake | Read from local CSV |
+| Output Location | `MESSAGING_SYSTEM_V2.GENERATED_DATA` | `Data/ui_output/messages.csv` |
+| Use Case | Production campaigns | UI testing & development |
+## Quick Start
+### 1. Test UI Mode with generate_message_parallel.py
+For testing purposes before UI integration:
+```python
+from ai_messaging_system_v2.generate_message_parallel import generate_messages_parallel
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+# Clear previous UI output
+Permes.clear_ui_output()
+# Generate messages for stage 1 in UI mode
+results = generate_messages_parallel(
+    message_number=1,
+    test_mode=False,
+    run_for_all_messages=False,
+    brand="drumeo",
+    campaign_type="re_engagement",
+    chunk_size=1000,
+    max_workers=5,
+    mode="ui"  # Enable UI mode
+)
+# Output will be in: Data/ui_output/messages.csv
+```
+### 2. Use UI Mode Directly in Your UI Tool
+When integrating with your UI tool, call `create_personalize_messages` directly:
+```python
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+from ai_messaging_system_v2.configs.config_loader import get_system_config
+from snowflake.snowpark import Session
+import pandas as pd
+# Initialize
+permes = Permes()
+system_config = get_system_config()
+# Clear previous output (optional, for fresh runs)
+Permes.clear_ui_output()
+# Prepare your users DataFrame
+users = pd.DataFrame({
+    'user_id': [123, 456, 789],
+    'email': ['[email protected]', '[email protected]', '[email protected]']
+})
+# Connect to Snowflake (still needed for user data fetching)
+session = Session.builder.configs(connection_params).create()
+# Generate messages with custom parameters
+users_with_messages = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand="drumeo",
+    config_file=system_config,
+    platform="push",
+    stage=1,
+    mode="ui",  # Enable UI mode
+    # Customize these parameters from your UI
+    recsys_contents=["workout", "course"],
+    model="gemini-2.5-flash-lite",
+    identifier_column="user_id",
+    segment_info="Students who haven't practiced in 3 days",
+    sample_example="Header: Get back to practicing!\nMessage: Your drums are waiting for you!",
+    involve_recsys_result=True,
+    personalization=True,
+    campaign_name="My-Custom-Campaign",
+    campaign_instructions="Keep messages encouraging and positive",
+    per_message_instructions="Focus on the recommended content",
+    specific_content_id=None,  # Set to content_id to force specific content for all users
+    ui_experiment_id=None  # Optional: Set for A/B testing (e.g., "messages_a_drumeo_20260111_1756")
+)
+# Messages are now in: Data/ui_output/messages.csv
+```
+## Configuration
+### UI Test Config File
+Location: `ai_messaging_system_v2/configs/test_data/ui_test_campaigns.py`
+This file contains a test campaign configuration that you can modify for testing:
+```python
+UI_TEST_CAMPAIGNS = {
+    "re_engagement": {
+        "campaign_view": "drumeo_re_engagement",
+        "campaign_name": "UI-Test-Campaign-Re-engagement",
+        "brand": "drumeo",
+        "campaign_instructions": "Keep messages encouraging...",
+        "1": {
+            "stage": 1,
+            "segment_info": "Students who haven't practiced...",
+            "recsys_contents": ["workout", "course", "quick_tips"],
+            "involve_recsys_result": True,
+            "personalization": True,
+            "model": "gemini-2.5-flash-lite",
+            "instructions": "Focus on recommended content...",
+            "specific_content_id": None  # Set to content_id to force specific content
+        },
+        # Add more stages as needed
+    }
+}
+```
+## Multi-Stage Campaigns
+UI Mode supports multi-stage campaigns with a single CSV file that grows:
+```python
+# Stage 1: Initial messages
+Permes.clear_ui_output()  # Clear for new campaign
+generate_messages_parallel(message_number=1, mode="ui", ...)
+# Stage 2: Follow-up messages (reads from stage 1)
+generate_messages_parallel(message_number=2, mode="ui", ...)
+# Stage 3: Another follow-up (reads from stages 1 & 2)
+generate_messages_parallel(message_number=3, mode="ui", ...)
+# All stages are in the same CSV file with a "stage" column
+```
+## A/B Testing Support
+UI Mode fully supports A/B testing with separate file tracking for each experiment:
+```python
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+import pandas as pd
+permes = Permes()
+system_config = get_system_config()
+# Shared configuration
+users = pd.DataFrame({'user_id': [123, 456, 789]})
+brand = "drumeo"
+campaign_name = "AB-Test-Campaign"
+session = Session.builder.configs(connection_params).create()
+# Generate Experiment A (Stage 1)
+users_a = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand=brand,
+    config_file=system_config,
+    stage=1,
+    mode="ui",
+    campaign_name=campaign_name,
+    model="gemini-2.5-flash-lite",
+    ui_experiment_id="messages_a_drumeo_20260111_1756"  # Unique ID for experiment A
+)
+# Saves to: Data/ui_output/messages_a_drumeo_20260111_1756.csv
+# Generate Experiment B (Stage 1)
+users_b = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand=brand,
+    config_file=system_config,
+    stage=1,
+    mode="ui",
+    campaign_name=campaign_name,
+    model="gpt-4o-mini",  # Different model for B
+    ui_experiment_id="messages_b_drumeo_20260111_1756"  # Unique ID for experiment B
+)
+# Saves to: Data/ui_output/messages_b_drumeo_20260111_1756.csv
+# Generate Follow-up Stage 2 for Experiment A
+users_a_stage2 = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand=brand,
+    config_file=system_config,
+    stage=2,
+    mode="ui",
+    campaign_name=campaign_name,
+    ui_experiment_id="messages_a_drumeo_20260111_1756"  # Same ID to append to A's file
+)
+# Appends to: Data/ui_output/messages_a_drumeo_20260111_1756.csv
+# Generate Follow-up Stage 2 for Experiment B
+users_b_stage2 = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand=brand,
+    config_file=system_config,
+    stage=2,
+    mode="ui",
+    campaign_name=campaign_name,
+    ui_experiment_id="messages_b_drumeo_20260111_1756"  # Same ID to append to B's file
+)
+# Appends to: Data/ui_output/messages_b_drumeo_20260111_1756.csv
+```
+**Key Points for A/B Testing:**
+- Use `ui_experiment_id` parameter to specify unique filenames for each experiment
+- Use the same `ui_experiment_id` across all stages of the same experiment
+- Each experiment gets its own CSV file that grows with each stage
+- Without `ui_experiment_id`, defaults to `messages.csv`
+## Output Format
+### messages.csv
+All columns from the Snowflake schema, including:
+- `user_id`, `email`, `first_name`
+- `message`, `header` (contains JSON with header/message)
+- `recommendation`, `recommendation_info`
+- `stage`, `campaign_name`, `timestamp`
+- `brand`, `platform`, `permission`
+- All user profile fields
+Example:
+```csv
+user_id,email,first_name,message,recommendation,stage,campaign_name,...
+123,[email protected],John,"{""1"":{""header"":""Hi John 👋"",...}}",workout_123,1,UI-Test-Campaign,...
+```
+### message_cost.csv
+Cost tracking information:
+- `brand`, `campaign_name`, `stage`
+- `model`, `number_of_messages`
+- `total_prompt_tokens`, `total_completion_tokens`
+- `total_cost`, `timestamp`
+## Helper Functions
+### Clear UI Output
+```python
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+# Clear all CSV files in UI output directory
+Permes.clear_ui_output()
+```
+### Get UI Output Path
+```python
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+# Get the path to UI output directory
+output_path = Permes.get_ui_output_path()
+print(f"UI output directory: {output_path}")
+```
+## Parameters You Can Control from UI
+When calling `create_personalize_messages` from your UI tool, you have full control over:
+1. **Campaign Configuration**:
+   - `campaign_name`: Custom campaign identifier
+   - `campaign_instructions`: Campaign-wide instructions (optional)
+   - `per_message_instructions`: Stage-specific instructions (optional)
+2. **Message Generation**:
+   - `stage`: Message number (1, 2, 3, ...)
+   - `model`: LLM model to use ("gemini-2.5-flash-lite", "gpt-5-nano", etc.)
+   - `sample_example`: Example message format
+   - `segment_info`: Description of user segment
+3. **Personalization**:
+   - `personalization`: Enable/disable personalization
+   - `involve_recsys_result`: Include content recommendations
+   - `recsys_contents`: Types of content to recommend ["workout", "course", "quick_tips", "song"]
+   - `specific_content_id`: Force specific content for all users (overrides AI recommendations)
+4. **Users**:
+   - `users`: DataFrame with user IDs or emails
+   - `identifier_column`: "user_id" or "email"
+## Promoting Specific Content
+You can force all users to receive the same content recommendation using `specific_content_id`:
+```python
+# Example: Promote a specific workout for all users
+users_with_messages = permes.create_personalize_messages(
+    session=session,
+    users=users,
+    brand="drumeo",
+    config_file=system_config,
+    stage=2,
+    mode="ui",
+    # Force all users to receive workout ID 12345
+    involve_recsys_result=True,  # Must be True to recommend content
+    specific_content_id=12345,   # All users get this content
+    # Optional: Customize message to emphasize the featured content
+    per_message_instructions="Emphasize this featured workout as a special opportunity",
+    # Other parameters...
+    personalization=True,
+    campaign_name="Featured-Workout-Campaign",
+)
+```
+**How it works:**
+- When `specific_content_id` is set, AI recommendations are completely bypassed
+- All users receive the same content, but messages remain personalized around it
+- System validates that the content exists in your brand's database
+- No LLM tokens used for content selection (faster and cheaper)
+- Raises an error if content_id not found (prevents silent failures)
+**Use cases:**
+- Marketing campaigns for specific courses or workouts
+- Feature new content releases to all users
+- A/B testing specific content performance
+- Seasonal or event-based promotions
+## Best Practices
+### 1. Clear Output Between Campaigns
+```python
+# When starting a new campaign with different parameters
+Permes.clear_ui_output()
+```
+### 2. Check Output Files
+```python
+import pandas as pd
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+# Read generated messages
+output_file = Permes.get_ui_output_path() / "messages.csv"
+messages = pd.read_csv(output_file, encoding='utf-8-sig')
+print(f"Generated {len(messages)} messages")
+```
+### 3. Handle Emojis Properly
+The system uses UTF-8-SIG encoding (UTF-8 with BOM) to ensure proper emoji display. When reading CSV files in your UI:
+```python
+# Always use UTF-8-SIG encoding when reading to properly handle emojis
+messages = pd.read_csv(file_path, encoding='utf-8-sig')
+# Or in JavaScript/TypeScript
+const messages = await fs.readFile(filePath, 'utf-8');
+```
+### 4. Testing Different Configurations
+Modify `configs/test_data/ui_test_campaigns.py` to test different:
+- Number of stages
+- Instructions (campaign-wide and per-stage)
+- Models
+- Content types
+- Sample examples
+- Specific content promotion (`specific_content_id`)
+## Troubleshooting
+### Issue: "No previous message data found"
+This is **expected** for stage 1 or when running the first stage of a new campaign. Previous stage data is only needed for stages 2+.
+### Issue: Emojis not displaying correctly
+Ensure you're reading CSV files with UTF-8-SIG encoding (UTF-8 with BOM):
+```python
+pd.read_csv(file_path, encoding='utf-8-sig')
+```
+### Issue: CSV file not found
+Make sure the output directory exists:
+```python
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+print(Permes.get_ui_output_path())
+```
+### Issue: Old data in CSV
+Clear the output directory before starting a new campaign:
+```python
+Permes.clear_ui_output()
+```
+### Issue: "specific_content_id X not found in content database"
+This error means the content_id doesn't exist for your brand:
+- Verify the content_id exists in your Snowflake content table
+- Ensure you're using the correct content_id for the brand
+- Check that the content is active and not deleted
+- Content IDs are brand-specific (Drumeo content ≠ Pianote content)
+## Integration Example
+Here's a complete example of how your UI might integrate with the system:
+```python
+from ai_messaging_system_v2.Messaging_system.Permes import Permes
+from ai_messaging_system_v2.configs.config_loader import get_system_config
+import pandas as pd
+class MessageGeneratorUI:
+    def __init__(self, session):
+        self.permes = Permes()
+        self.system_config = get_system_config()
+        self.session = session
+    def start_new_campaign(self):
+        """Clear output when starting a new campaign"""
+        Permes.clear_ui_output()
+    def generate_stage(self, stage, users_df, ui_params):
+        """Generate messages for a specific stage with UI parameters"""
+        messages = self.permes.create_personalize_messages(
+            session=self.session,
+            users=users_df,
+            brand=ui_params['brand'],
+            config_file=self.system_config,
+            stage=stage,
+            mode="ui",
+            # Parameters from UI form
+            campaign_name=ui_params['campaign_name'],
+            campaign_instructions=ui_params.get('campaign_instructions'),
+            per_message_instructions=ui_params.get('stage_instructions'),
+            model=ui_params['model'],
+            recsys_contents=ui_params['content_types'],
+            sample_example=ui_params['sample_example'],
+            segment_info=ui_params['segment_description'],
+            involve_recsys_result=ui_params['include_recommendations'],
+            personalization=ui_params['enable_personalization'],
+            specific_content_id=ui_params.get('specific_content_id')  # Optional: force specific content
+        )
+        return messages
+    def get_results(self):
+        """Read generated messages from CSV"""
+        output_file = Permes.get_ui_output_path() / "messages.csv"
+        return pd.read_csv(output_file, encoding='utf-8-sig')
+# Usage in your UI
+ui = MessageGeneratorUI(session)
+ui.start_new_campaign()
+# Generate stage 1
+stage1_messages = ui.generate_stage(
+    stage=1,
+    users_df=selected_users,
+    ui_params={
+        'brand': 'drumeo',
+        'campaign_name': 'My Custom Campaign',
+        'model': 'gemini-2.5-flash-lite',
+        'content_types': ['workout', 'course'],
+        'sample_example': 'Header: Hi!\nMessage: Check this out!',
+        'segment_description': 'Inactive users',
+        'include_recommendations': True,
+        'enable_personalization': True,
+        'campaign_instructions': 'Be encouraging',
+        'stage_instructions': 'Focus on content',
+        'specific_content_id': None  # Set to content_id to force specific content for all users
+    }
+)
+# Display in UI
+results = ui.get_results()
+```
+## Summary
+UI Mode provides a clean, local alternative to Snowflake storage for UI applications. It maintains the same workflow and data structure as production mode, but stores everything locally in CSV format with proper UTF-8 encoding for emoji support.
+For questions or issues, contact: [email protected]

ai_messaging_system_v2/configs/README.md ADDED Viewed

	@@ -0,0 +1,363 @@

+# Configuration System Documentation
+This directory contains the modular configuration system for the AI Messaging Platform.
+## 📁 Directory Structure
+```
+configs/
+├── system/
+│   ├── __init__.py
+│   └── system_config.py          # System-wide settings
+├── singeo/
+│   ├── __init__.py
+│   └── campaigns.py               # Singeo brand campaigns
+├── drumeo/
+│   ├── __init__.py
+│   └── campaigns.py               # Drumeo brand campaigns
+├── guitareo/
+│   ├── __init__.py
+│   └── campaigns.py               # Guitareo brand campaigns
+├── pianote/
+│   ├── __init__.py
+│   └── campaigns.py               # Pianote brand campaigns
+├── test_data/
+│   ├── __init__.py
+│   └── test_config.py             # Test campaign data
+├── config_loader.py               # Main config loader
+└── README.md                      # This file
+```
+## 🚀 Quick Start
+### Loading System Configuration
+```python
+from configs.config_loader import get_system_config
+system_config = get_system_config()
+print(system_config['header_limit'])  # 30
+print(system_config['message_limit'])  # 110
+```
+### Loading Campaign Configuration
+```python
+from configs.config_loader import get_campaign_config
+# Production mode
+campaign = get_campaign_config("drumeo", "re_engagement", test_mode=False)
+# Test mode (uses test campaign name)
+campaign = get_campaign_config("drumeo", "re_engagement", test_mode=True)
+# Access stage configuration
+stage_1_config = campaign["1"]
+print(stage_1_config["model"])  # gemini-2.5-flash-lite
+# Access campaign-wide instructions
+campaign_instructions = campaign.get("campaign_instructions")
+# Access stage-specific instructions
+stage_instructions = stage_1_config.get("instructions")
+```
+### Helper Functions
+```python
+from configs.config_loader import get_all_brands, get_campaign_types
+# Get all available brands
+brands = get_all_brands()
+# ['singeo', 'drumeo', 'guitareo', 'pianote']
+# Get campaign types for a brand
+campaign_types = get_campaign_types("drumeo")
+# ['re_engagement']
+```
+## ✨ Configuration Features
+### 1. System Configuration
+Located in `configs/system/system_config.py`, contains:
+- User profile features to extract
+- Message character limits (header/message)
+- LLM model configurations (OpenAI, Google, Claude)
+- AI jargon filters
+- Banned content IDs
+- Model failure thresholds
+### 2. Brand Campaign Configuration
+Each brand has its own `campaigns.py` file containing:
+- Campaign metadata (view name, campaign name, brand)
+- Campaign-wide instructions (optional)
+- Stage configurations (1-11) with:
+  - Stage-specific settings
+  - Per-message instructions (optional)
+  - Model selection
+  - Personalization settings
+  - Content recommendation types
+### 3. Campaign-Wide Instructions
+Apply instructions to ALL stages of a campaign:
+```python
+# In configs/{brand}/campaigns.py
+CAMPAIGNS = {
+    "re_engagement": {
+        "campaign_view": "drumeo_re_engagement",
+        "campaign_name": "Drumeo - Inactive Members (for 3 days) - Re-engagement",
+        "brand": "drumeo",
+        # Campaign-wide instructions applied to all stages
+        "campaign_instructions": "Keep the tone upbeat and encouraging. Focus on getting users back to practicing.",
+        "1": {
+            "stage": 1,
+            # ... stage config
+        }
+    }
+}
+```
+### 4. Per-Message (Stage-Specific) Instructions
+Add instructions for individual stages:
+```python
+"1": {
+    "stage": 1,
+    "segment_info": "Students who haven't practiced...",
+    "model": "gemini-2.5-flash-lite",
+    # Stage-specific instructions
+    "instructions": "For stage 1, emphasize the recommended content and make it feel fresh.",
+}
+```
+### 5. Specific Content Promotion
+Force a specific content for all users in a stage (overrides AI recommendations):
+```python
+"1": {
+    "stage": 1,
+    "segment_info": "Students who haven't practiced...",
+    "involve_recsys_result": True,  # Must be True to recommend content
+    # NEW: Force specific content for ALL users at this stage
+    "specific_content_id": 12345,  # Set to content_id, or None for regular AI recommendations
+    "model": "gemini-2.5-flash-lite",
+    "instructions": "Emphasize this special featured content",
+}
+```
+**How it works:**
+- When `specific_content_id` is set, ALL users receive the same content recommendation
+- Overrides the AI-powered recommendation system entirely
+- No LLM tokens used for content selection (cost savings)
+- System validates that content exists in database for the brand
+- Raises error if content_id not found (no silent failures)
+- Set to `None` (default) to use regular personalized recommendations
+**Use cases:**
+- Promote a specific course, workout, or song for a marketing campaign
+- Feature new content to all users in a particular stage
+- A/B test specific content performance
+- Seasonal or event-based content promotion
+**Priority Order:**
+1. If `specific_content_id` is set → Use that content for all users
+2. Else if `involve_recsys_result=True` → Use AI/random recommendations
+3. Else → Use Homepage recommender (redirect to "For You" page)
+### 6. Instruction Injection in Prompts
+Both `campaign_instructions` and `per_message_instructions` are automatically injected into LLM prompts **after the user profile section**, providing contextual guidance while maintaining the prompt structure.
+## 📝 How to Add/Modify Configurations
+### Adding a New Brand
+1. Create directory: `configs/newbrand/`
+2. Add `__init__.py` (empty file)
+3. Create `campaigns.py`:
+```python
+"""
+NewBrand Campaign Configurations
+"""
+CAMPAIGNS = {
+    "re_engagement": {
+        "campaign_view": "newbrand_re_engagement",
+        "campaign_name": "NewBrand - Inactive Members - Re-engagement",
+        "brand": "newbrand",
+        "campaign_instructions": None,
+        "1": {
+            # ... stage configs
+        }
+    }
+}
+def get_campaigns():
+    return CAMPAIGNS
+```
+4. Update `configs/config_loader.py`:
+```python
+from .newbrand.campaigns import get_campaigns as get_newbrand_campaigns
+BRAND_CAMPAIGNS = {
+    # ... existing brands
+    "newbrand": get_newbrand_campaigns(),
+}
+```
+### Adding a New Campaign Type
+Edit `configs/{brand}/campaigns.py`:
+```python
+CAMPAIGNS = {
+    "re_engagement": {
+        # ... existing config
+    },
+    "new_campaign_type": {
+        "campaign_view": "brand_new_campaign",
+        "campaign_name": "Brand - New Campaign",
+        "brand": "brand",
+        "campaign_instructions": "Optional campaign-wide instructions",
+        "1": {
+            "stage": 1,
+            "segment_info": "...",
+            "instructions": "Optional stage-specific instructions",
+            "specific_content_id": None,  # Optional: Force specific content for all users
+            # ... other stage settings
+        }
+    }
+}
+```
+### Modifying System Settings
+Edit `configs/system/system_config.py`:
+```python
+SYSTEM_CONFIG = {
+    "header_limit": 30,  # Modify as needed
+    "message_limit": 110,
+    "openai_models": ["gpt-4o-mini", "gpt-5-nano"],
+    # ... other settings
+}
+```
+### Adding or Modifying Instructions
+Edit the brand's `campaigns.py`:
+```python
+CAMPAIGNS = {
+    "re_engagement": {
+        # Add or modify campaign-wide instructions
+        "campaign_instructions": "Always be encouraging and avoid negative phrasing",
+        "1": {
+            # Add or modify stage-specific instructions
+            "instructions": "Focus heavily on the recommended content",
+            # ... rest of config
+        },
+        "2": {
+            "instructions": "Emphasize the time since last login",
+            # ... rest of config
+        }
+    }
+}
+```
+## 🔄 Dynamic Chunking System
+The parallel processing system includes smart chunking that maximizes worker utilization:
+**Rules:**
+- Minimum chunk size: 20 users
+- Maximum chunk size: 1000 users
+- Users are distributed equally among available workers
+**Examples:**
+- 1000 users, 5 workers → 5 chunks of 200 each (uses all workers)
+- 150 users, 5 workers → 5 chunks of 30 each (uses all workers)
+- 50 users, 5 workers → 3 chunks of ~17 each (uses 3 workers)
+- 15 users, 5 workers → 1 chunk of 15 (below minimum, single chunk)
+This is handled automatically in `generate_message_parallel.py` via the `split_into_chunks()` function.
+## 🧪 Testing
+Enable test mode to use test data:
+```python
+from ai_messaging_system_v2.generate_message_parallel import generate_messages_parallel
+results = generate_messages_parallel(
+    message_number=1,
+    test_mode=True,  # Uses test campaign name and staff data
+    brand="drumeo",
+    campaign_type="re_engagement",
+    chunk_size=1000,  # Maximum, actual size determined dynamically
+    max_workers=5
+)
+```
+Test data configuration: `configs/test_data/test_config.py`
+## 🏗️ System Architecture
+### Configuration Flow
+1. **Load System Config**: `get_system_config()` returns system-wide settings
+2. **Load Campaign Config**: `get_campaign_config(brand, type, test_mode)` retrieves brand+campaign config
+3. **Extract Stage Data**: Campaign config contains all 11 stages with metadata
+4. **Pass Instructions**: Both campaign and stage instructions flow through:
+   - `generate_message_parallel.py` → `read_data()`
+   - `Permes.py` → `create_personalize_messages()`
+   - `CoreConfig.py` → stores as attributes
+   - `PromptGenerator.py` → `get_additional_instructions()` injects into prompts
+### Instruction Injection Flow
+```
+Campaign Config
+    ↓
+campaign_instructions (all stages) + per_message_instructions (specific stage)
+    ↓
+Passed to Permes.create_personalize_messages()
+    ↓
+Stored in CoreConfig
+    ↓
+PromptGenerator.get_additional_instructions()
+    ↓
+Injected after user profile section in prompt
+    ↓
+Sent to LLM
+```
+## �� Benefits
+1. **Modularity**: Configurations separated by purpose (system/brand/test)
+2. **Scalability**: Easy to add new brands or campaigns
+3. **Maintainability**: Changes isolated to specific files
+4. **Extensibility**: New features can be added per campaign or stage
+5. **Clarity**: Each file serves a single, clear purpose
+6. **Flexibility**: Instructions can be campaign-wide or stage-specific
+## 📞 Support
+For questions about the configuration system:
+- Contact: [email protected]
+- Main project documentation: `ai_messaging_system_v2/README.md`

Messaging_system/StoreLayer.py → ai_messaging_system_v2/configs/__init__.py RENAMED Viewed

File without changes

ai_messaging_system_v2/configs/config_loader.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""
+Configuration Loader
+This module serves as the main entry point for loading all configurations.
+It consolidates system settings, brand campaigns, and test data from the
+modular config structure.
+Usage:
+    from configs.config_loader import get_system_config, get_campaign_config
+    # Get system configuration
+    system_config = get_system_config()
+    # Get brand campaign configuration
+    campaign_config = get_campaign_config("drumeo", "re_engagement", test_mode=False)
+Migration from embedded_configs.py:
+    Old: from embedded_configs import SYSTEM_CONFIG, get_campaign_config
+    New: from configs.config_loader import get_system_config, get_campaign_config
+         SYSTEM_CONFIG = get_system_config()
+"""
+import copy
+# Import system configuration
+from .system.system_config import SYSTEM_CONFIG as _SYSTEM_CONFIG
+# Import brand campaign configurations
+from .singeo.campaigns import get_campaigns as get_singeo_campaigns
+from .drumeo.campaigns import get_campaigns as get_drumeo_campaigns
+from .guitareo.campaigns import get_campaigns as get_guitareo_campaigns
+from .pianote.campaigns import get_campaigns as get_pianote_campaigns
+# Import test data
+from .test_data.test_config import TEST_CAMPAIGN_NAME, TEST_STAFF_DATA
+from .test_data.ui_test_campaigns import get_ui_test_campaigns
+# Build the unified BRAND_CAMPAIGNS dictionary
+BRAND_CAMPAIGNS = {
+    "singeo": get_singeo_campaigns(),
+    "drumeo": get_drumeo_campaigns(),
+    "guitareo": get_guitareo_campaigns(),
+    "pianote": get_pianote_campaigns(),
+}
+# UI test campaigns (for UI mode testing)
+UI_TEST_CAMPAIGNS = get_ui_test_campaigns()
+def get_system_config():
+    """
+    Retrieve the system configuration.
+    Returns:
+        dict: System configuration dictionary containing:
+            - user_info_features: List of user profile features
+            - interaction_features: List of interaction tracking features
+            - header_limit/message_limit: Character limits
+            - Model configurations (OpenAI, Google, Claude, etc.)
+            - AI jargon filters
+            - Banned content IDs
+    Example:
+        >>> config = get_system_config()
+        >>> print(config['header_limit'])
+        30
+    """
+    return _SYSTEM_CONFIG
+def get_campaign_config(brand, campaign_type="re_engagement", test_mode=False, mode="production"):
+    """
+    Retrieve campaign configuration for a specific brand.
+    When test_mode=True, the campaign_name is automatically changed to TEST_CAMPAIGN_NAME.
+    When mode="ui", uses UI test campaign configuration.
+    Args:
+        brand (str): Brand name (e.g., "singeo", "guitareo", "pianote", "drumeo")
+        campaign_type (str): Campaign type (default: "re_engagement")
+        test_mode (bool): If True, replaces campaign_name with test campaign name
+        mode (str): Operating mode - "production", "test", or "ui" (default: "production")
+    Returns:
+        dict: Campaign configuration (deep copy to avoid modifying original)
+            Contains:
+            - campaign_view: Snowflake view name
+            - campaign_name: Campaign identifier
+            - brand: Brand name
+            - campaign_instructions: Optional campaign-wide instructions
+            - stages 1-11: Individual stage configurations
+    Raises:
+        KeyError: If brand or campaign type not found
+    Examples:
+        >>> # Get production config
+        >>> config = get_campaign_config("singeo", mode="production")
+        >>> print(config["campaign_name"])
+        'Singeo - Inactive Members (for 3 days) - Re-engagement'
+        >>> # Get test config
+        >>> config = get_campaign_config("drumeo", "re_engagement", test_mode=True)
+        >>> print(config["campaign_name"])
+        'musora-staff-test-campaign'
+        >>> # Get UI config
+        >>> config = get_campaign_config("drumeo", "re_engagement", mode="ui")
+        >>> print(config["campaign_name"])
+        'UI-Test-Campaign-Re-engagement'
+        >>> # Access stage-specific configuration
+        >>> stage_1_config = config["1"]
+        >>> print(stage_1_config["model"])
+        'gemini-2.5-flash-lite'
+    """
+    # UI mode: Load from UI test campaigns
+    if mode == "ui":
+        if campaign_type not in UI_TEST_CAMPAIGNS:
+            available_types = list(UI_TEST_CAMPAIGNS.keys())
+            raise KeyError(
+                f"Campaign type '{campaign_type}' not found in UI test campaigns. "
+                f"Available types: {available_types}"
+            )
+        # Return deep copy of UI test campaign
+        return copy.deepcopy(UI_TEST_CAMPAIGNS[campaign_type])
+    # Production/Test mode: Load from brand campaigns
+    if brand not in BRAND_CAMPAIGNS:
+        available_brands = list(BRAND_CAMPAIGNS.keys())
+        raise KeyError(
+            f"Brand '{brand}' not found. Available brands: {available_brands}"
+        )
+    if campaign_type not in BRAND_CAMPAIGNS[brand]:
+        available_types = list(BRAND_CAMPAIGNS[brand].keys())
+        raise KeyError(
+            f"Campaign type '{campaign_type}' not found for brand '{brand}'. "
+            f"Available types: {available_types}"
+        )
+    # Get a deep copy to avoid modifying the original
+    config = copy.deepcopy(BRAND_CAMPAIGNS[brand][campaign_type])
+    # Override campaign_name if in test mode
+    if test_mode:
+        config["campaign_name"] = TEST_CAMPAIGN_NAME
+    return config
+def get_all_brands():
+    """
+    Get list of all available brands.
+    Returns:
+        list: List of brand names
+    Example:
+        >>> brands = get_all_brands()
+        >>> print(brands)
+        ['singeo', 'drumeo', 'guitareo', 'pianote']
+    """
+    return list(BRAND_CAMPAIGNS.keys())
+def get_campaign_types(brand):
+    """
+    Get available campaign types for a specific brand.
+    Args:
+        brand (str): Brand name
+    Returns:
+        list: List of campaign types available for the brand
+    Raises:
+        KeyError: If brand not found
+    Example:
+        >>> campaign_types = get_campaign_types("drumeo")
+        >>> print(campaign_types)
+        ['re_engagement']
+    """
+    if brand not in BRAND_CAMPAIGNS:
+        available_brands = list(BRAND_CAMPAIGNS.keys())
+        raise KeyError(
+            f"Brand '{brand}' not found. Available brands: {available_brands}"
+        )
+    return list(BRAND_CAMPAIGNS[brand].keys())
+# Backward compatibility exports
+SYSTEM_CONFIG = _SYSTEM_CONFIG
+__all__ = [
+    'get_system_config',
+    'get_campaign_config',
+    'get_all_brands',
+    'get_campaign_types',
+    'SYSTEM_CONFIG',
+    'BRAND_CAMPAIGNS',
+    'UI_TEST_CAMPAIGNS',
+    'TEST_CAMPAIGN_NAME',
+    'TEST_STAFF_DATA',
+]

ai_messaging_system_v2/configs/drumeo/__init__.py ADDED Viewed

File without changes