/images/logo.png
A notebook for something

Use GPU for CLIP

The Code

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import open_clip
import transformers
from multilingual_clip import pt_multilingual_clip, Config_MCLIP

DEVICE = 'cuda'

class CudaMultilingualCLIP(transformers.PreTrainedModel):
    """to support GPU on encoding text

    """
    config_class = Config_MCLIP.MCLIPConfig

    def __init__(self, config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self.transformer = transformers.AutoModel.from_pretrained(config.modelBase)
        self.LinearTransformation = torch.nn.Linear(in_features=config.transformerDimensions,
                                                    out_features=config.numDims)

    def forward(self, txt, tokenizer, device=DEVICE):
        txt_tok = tokenizer(txt, padding=True, return_tensors='pt').to(device)
        embs = self.transformer(**txt_tok)[0]
        att = txt_tok['attention_mask']
        embs = (embs * att.unsqueeze(2)).sum(dim=1) / att.sum(dim=1)[:, None]
        return self.LinearTransformation(embs)

    @classmethod
    def _load_state_dict_into_model(cls, model, state_dict, pretrained_model_name_or_path, _fast_init=True):
        model.load_state_dict(state_dict)
        return model, [], [], []

Usage

1
2
3
4
5
6
7
8
9
text_model_name = 'M-CLIP/XLM-Roberta-Large-Vit-B-16Plus'
text_model = CudaMultilingualCLIP.from_pretrained(text_model_name)
tokenizer = transformers.AutoTokenizer.from_pretrained(text_model_name)
text_model = text_model.to(DEVICE)
with torch.no_grad():
    embeddings = text_model.forward(texts, text_tokenizer)
    embeddings /= embeddings.norm(dim=-1, keepdim=True)
    embeddings = embeddings.squeeze(0)
embed = embedding.to('cpu').numpy()

Opencv Video

Simple tutorial to capture video with Raspberry Pi’s camera

1. Use contab to run the script automatically on startup

1
crontab -e

2. Add this line to the end

1
@reboot python /home/pi/workspace/camera/capture_video.py &

3. The Code - capture_video.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import argparse
import os
import time
from datetime import datetime
from pytz import timezone
import pytz

import cv2

dir_current = os.path.dirname(os.path.realpath(__file__))

def save_video(save_folder=os.path.join(dir_current, 'video'),
               target_fps=10,
               save_interval_mins=30):
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
    print(f'Current W: {width}, H: {height}')

    fps = cap.get(cv2.CAP_PROP_FPS)
    #cap.set(cv2.CAP_PROP_FPS, 10)
    date_format='%m_%d_%Y_%H_%M_%S'

    os.makedirs(save_folder, exist_ok=True)
    time_started = time.time()
    date = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific'))
    save_path = os.path.join(save_folder, f'{date.strftime(date_format)}.mp4')
    print(save_path)
    vid_writer = cv2.VideoWriter(
            save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)))
    prev = 0
    try:
        while True:
            time_current = time.time()
            if time_current - time_started >= save_interval_mins*60:  # save new file
                time_started = time_current
                date = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific'))
                save_path = os.path.join(save_folder, f'{date.strftime(date_format)}.mp4')
                vid_writer = cv2.VideoWriter(
                    save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)))
                print(save_path)

            time_elapsed = time_current - prev
            ret_val, frame = cap.read()
            if target_fps > 0:
                if time_elapsed < 1. / target_fps:
                    continue
            prev = time_current
            if ret_val:
                #frame = cv2.rotate(frame, cv2.ROTATE_180)
                date = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific'))
                frame = cv2.putText(frame,f'{date.strftime(date_format)}',
                                (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (255, 0, 0), 1, cv2.LINE_AA)
                vid_writer.write(frame)
            key_pressed = cv2.waitKey(1)
            if key_pressed == 27 or key_pressed == ord("q") or key_pressed == ord("Q"):
                break
        print('exiting')
        vid_writer.release()
        cap.release()
    except:
        print('terminating')
        vid_writer.release()
        cap.release()

if __name__ == "__main__":
    save_video(target_fps=10, save_interval_mins=30)

Object Detection with Yolox on saved video file

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import glob
from datetime import datetime
from pytz import timezone
import pytz

import torch

from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp

import numpy as np
import tqdm
import cv2

from dog import Predictor

model_name = 'yolox-x'
model_filename = 'yolox_x.pth'
conf = 0.25
nms = 0.45
tsize = 640
device = 'mps'

exp = get_exp(None, model_name)
model = exp.get_model()

ckpt = torch.load(model_filename, map_location="cpu")
model.load_state_dict(ckpt["model"])
model = model.to("mps")
model.eval()

predictor = Predictor(model, exp, COCO_CLASSES, None, None, 'mps', False, False)

date_format = '%m/%d/%Y %H:%M:%S'
date = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific'))

save_path = os.path.join('results', '1.mp4')
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), 24, (640, 480))

filenames = glob.glob('videos/*.mp4')
for filename in tqdm.tqdm(filenames):
    #if filename != 'videos/05_21_2023_15_47_12.mp4':
    #    continue
    vidcap = cv2.VideoCapture(filename)
    video_length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
    print (filename, video_length)
    frames = []
    success,image = vidcap.read()
    while success:
        frames.append(image)
        success, image = vidcap.read()

    for image in tqdm.tqdm(frames):
        outputs, img_info = predictor.inference(image)
        result_frame = predictor.visual(outputs[0], img_info)
        if result_frame is not None:
            vid_writer.write(result_frame)
print('DONE')

Llama Download

Tokenizer

1
curl -OL https://agi.gpt4.org/llama/LLaMA/tokenizer.model

7B

1
2
curl -OL https://agi.gpt4.org/llama/LLaMA/7B/consolidated.00.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/7B/params.json

13B

1
2
3
curl -OL https://agi.gpt4.org/llama/LLaMA/13B/consolidated.00.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/13B/consolidated.01.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/13B/params.json

30B

1
2
3
4
5
curl -OL https://agi.gpt4.org/llama/LLaMA/30B/consolidated.00.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/30B/consolidated.01.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/30B/consolidated.02.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/30B/consolidated.03.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/30B/params.json

65B

1
2
3
4
5
6
7
8
9
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.00.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.01.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.02.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.03.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.04.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.05.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.06.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/consolidated.07.pth
curl -OL https://agi.gpt4.org/llama/LLaMA/65B/params.json