antidrift/antidrift/evaluate.py

import csv
import keyring
import requests
import json
import antidrift.xwindow as xwindow
from antidrift.config import Config
from collections import defaultdict
from datetime import datetime, timedelta
from dataclasses import dataclass
from functools import lru_cache
from typing import List, Optional


@dataclass
class Datapoint:
    timestamp: datetime
    title: str
    tool: str
    intention: str


@dataclass
class Evaluation:
    level: str
    reason: str


def filter_today(datapoints: List[Datapoint]) -> List[Datapoint]:
    today = datetime.now().date()
    return [d for d in datapoints if d.timestamp.date() == today]


def filter_last_hour(datapoints: List[Datapoint]) -> List[Datapoint]:
    one_hour_ago = datetime.now() - timedelta(minutes=50)
    return [d for d in datapoints if d.timestamp >= one_hour_ago]


def evaluate(config: Config):
    log_file = config.window_log_file
    datapoints: List[Datapoint] = []

    with open(log_file, "r") as file:
        reader = csv.reader(file)
        for row in reader:
            timestamp_str, title, tool, intention = row
            if title != "":
                timestamp = datetime.fromisoformat(timestamp_str)
                datapoint = Datapoint(timestamp, title, tool, intention)
                datapoints.append(datapoint)

    datapoints = filter_last_hour(datapoints)
    durations = defaultdict(timedelta)
    prev_datapoint = None
    prev_evaluation = None
    for d in datapoints:
        if d.title == "":
            continue

        # Get evaluation of current datapoint
        result = evaluate_datapoint(d.title, d.tool, d.intention)
        evaluation = parse_result(result)

        # If there was a previous datapoint and evaluation
        if prev_datapoint and prev_evaluation:
            # Calculate time difference between current and previous datapoint
            time_diff = d.timestamp - prev_datapoint.timestamp
            # Add this time difference to the corresponding level's duration
            durations[prev_evaluation.level] += time_diff

        # Update previous datapoint and evaluation
        prev_datapoint = d
        prev_evaluation = evaluation

    # Print durations for each level
    for level, duration in durations.items():
        print(f"Level: {level}, Duration: {duration}")


def parse_result(result: str) -> Optional[Evaluation]:
    try:
        content = json.loads(result.strip())
        return Evaluation(content["level"], content["reason"])
    except (ValueError, KeyError):
        return None


@lru_cache
def evaluate_datapoint(title, tool, intention) -> Optional[str]:
    messages = []
    api_key = keyring.get_password("openai-api-key", "felixm")
    prompt = get_prompt(title, tool, intention)

    instruction = "You are productivity rater GPT and classify work sessions."
    messages.append({"role": "system", "content": instruction})
    messages.append({"role": "user", "content": prompt})
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}",
    }

    BASE_ENDPOINT = "https://api.openai.com/v1"
    body = {"model": "gpt-4", "messages": messages}

    try:
        r = requests.post(
            f"{BASE_ENDPOINT}/chat/completions", headers=headers, json=body, timeout=10
        )
    except requests.ConnectionError:
        xwindow.notify("Antidrift - GPT - Connection error")
        return None
    except requests.Timeout:
        xwindow.notify("Antidrift - GPT - Timeout")
        return None

    if r.status_code == 200:
        response = r.json()
        message_response = response["choices"][0]["message"]
        return message_response["content"]
    else:
        xwindow.notify(f"Antidrift - GPT - Response error status code {r.status_code}")
        return None


def get_prompt(title: str, tool: str, intention: str) -> str:
    return f"""
            Rate how well that title and tool matches the intention.

            Use one of the following levels:

            deep work, shallow work, good media, bad media, inappropriate

            Return your response as JSON object with the attributes 'level' and 'reason'.

            Adult or other inappropriate NSFW content always scores 'inappropriate'.

            Examples:

            Intention: Work on coding.
            Tool: VS Code
            Title: main.py - GoalGuard - Code - OSS

            Response:
            {{
                "level": "deep work",
                "reason": "The user uses VS code to work on coding."
            }}

            Intention: Watch educational video.
            Tool: Firefox
            Title: World's hardest jigsaw puzzle - YouTube - Mozilla Firefox

            Response:
            {{
                "level": "good media",
                "reason": "The user does the desired activity, and it seems educational."
            }}

            Intention: no intention
            Tool: Firefox
            Title: Reddit - Mozilla Firefox

            Response:
            {{
                "level": "bad media",
                "reason": "The user does not have an intention and wastes time on reddit."
            }}

            Intention: Watch educational video.
            Tool: Firefox
            Title: 8tube.com - Mozilla Firefox Private Browsing

            Response:
            {{
                "level": "inapproriate",
                "reason": "The user consumes adult content."
            }}

            Intention: no intention
            Tool: Firefox
            Title: Amazing Marvin - Daily Tasks — Mozilla Firefox

            Response:
            {{
                "level": "shallow work",
                "reason": "The user works on their task list but does not engage in deep work."
            }}

            Intention: {intention}
            Tool: {tool}
            Title: {title}

            Response:
            """