Upload our codec

choiseungmi
Commit a9105f5b478d41083850e5ec638d0cbf9b3e76f6 a9105f5b 1 parent a32c00bd
Showing 3 changed files with 1339 additions and 0 deletions
Our Encoder/codec-Copy1.py
Our Encoder/codec-Copy2.py
Our Encoder/codec-Copy3.py
--- a/Our Encoder/codec-Copy1.py 0 → 100644
View file @a9105f5
+++ b/Our Encoder/codec-Copy1.py 0 → 100644
View file @a9105f5
+# Copyright 2020 InterDigital Communications, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import struct
+import sys
+import time
+import math
+
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+
+from PIL import Image
+from torchvision.transforms import ToPILImage, ToTensor
+
+import compressai
+
+from compressai.zoo import models
+
+model_ids = {k: i for i, k in enumerate(models.keys())}
+
+metric_ids = {
+    "mse": 0,
+}
+
+
+def inverse_dict(d):
+    # We assume dict values are unique...
+    assert len(d.keys()) == len(set(d.keys()))
+    return {v: k for k, v in d.items()}
+
+
+def filesize(filepath: str) -> int:
+    if not Path(filepath).is_file():
+        raise ValueError(f'Invalid file "{filepath}".')
+    return Path(filepath).stat().st_size
+
+
+def load_image(filepath: str) -> Image.Image:
+    return Image.open(filepath).convert("RGB")
+
+
+def img2torch(img: Image.Image) -> torch.Tensor:
+    return ToTensor()(img).unsqueeze(0)
+
+
+def torch2img(x: torch.Tensor) -> Image.Image:
+    return ToPILImage()(x.clamp_(0, 1).squeeze())
+
+
+def write_uints(fd, values, fmt=">{:d}I"):
+    fd.write(struct.pack(fmt.format(len(values)), *values))
+
+
+def write_uchars(fd, values, fmt=">{:d}B"):
+    fd.write(struct.pack(fmt.format(len(values)), *values))
+
+
+def read_uints(fd, n, fmt=">{:d}I"):
+    sz = struct.calcsize("I")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))
+
+
+def read_uchars(fd, n, fmt=">{:d}B"):
+    sz = struct.calcsize("B")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))
+
+
+def write_bytes(fd, values, fmt=">{:d}s"):
+    if len(values) == 0:
+        return
+    fd.write(struct.pack(fmt.format(len(values)), values))
+
+
+def read_bytes(fd, n, fmt=">{:d}s"):
+    sz = struct.calcsize("s")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))[0]
+
+
+def get_header(model_name, metric, quality):
+    """Format header information:
+    - 1 byte for model id
+    - 4 bits for metric
+    - 4 bits for quality param
+    """
+    metric = metric_ids[metric]
+    code = (metric << 4) | (quality - 1 & 0x0F)
+    return model_ids[model_name], code
+
+
+def parse_header(header):
+    """Read header information from 2 bytes:
+    - 1 byte for model id
+    - 4 bits for metric
+    - 4 bits for quality param
+    """
+    model_id, code = header
+    quality = (code & 0x0F) + 1
+    metric = code >> 4
+    return (
+        inverse_dict(model_ids)[model_id],
+        inverse_dict(metric_ids)[metric],
+        quality,
+    )
+
+
+def pad(x, p=2 ** 6):
+    h, w = x.size(2), x.size(3)
+    H = (h + p - 1) // p * p
+    W = (w + p - 1) // p * p
+    padding_left = (W - w) // 2
+    padding_right = W - w - padding_left
+    padding_top = (H - h) // 2
+    padding_bottom = H - h - padding_top
+    return F.pad(
+        x,
+        (padding_left, padding_right, padding_top, padding_bottom),
+        mode="constant",
+        value=0,
+    )
+
+
+def crop(x, size):
+    H, W = x.size(2), x.size(3)
+    h, w = size
+    padding_left = (W - w) // 2
+    padding_right = W - w - padding_left
+    padding_top = (H - h) // 2
+    padding_bottom = H - h - padding_top
+    return F.pad(
+        x,
+        (-padding_left, -padding_right, -padding_top, -padding_bottom),
+        mode="constant",
+        value=0,
+    )
+
+def compute_psnr(a, b):
+    mse = torch.mean((a - b)**2).item()
+    return -10 * math.log10(mse)
+
+def _encode(path, image, model, metric, quality, coder, i, ref,total_bpp, ff, output, log_path):
+    compressai.set_entropy_coder(coder)
+    enc_start = time.time()
+
+    img = load_image(image)
+    start = time.time()
+    net = models[model](quality=quality, metric=metric, pretrained=True).eval()
+    load_time = time.time() - start
+
+    x = img2torch(img)
+    h, w = x.size(2), x.size(3)
+    p = 64  # maximum 6 strides of 2
+    x = pad(x, p)
+    
+#    header = get_header(model, metric, quality)
+    
+    strings = []
+
+    with torch.no_grad():
+        out = net.compress(x)
+    shape = out["shape"]
+    with Path(output).open("ab") as f:
+        # write shape and number of encoded latents
+        write_uints(f, (shape[0], shape[1], len(out["strings"])))
+
+        for s in out["strings"]:
+            write_uints(f, (len(s[0]),))
+            write_bytes(f, s[0])
+            strings.append([s[0]])
+
+    with torch.no_grad():
+        recon_out = net.decompress(strings, (shape[0], shape[1], len(out["strings"])))
+    x_recon = crop(recon_out["x_hat"], (h, w))
+
+    psnr=compute_psnr(x, x_recon)
+    
+    if i==False:
+        diff=x-ref
+        diff1=torch.clamp(diff, min=-0.5, max=0.5)+0.5
+        diff_img = torch2img(diff1)
+        diff_img.save(path+"recon/diff_v1_"+str(ff)+"_q"+str(quality)+".png")
+        
+    enc_time = time.time() - enc_start
+    size = filesize(output)
+    bpp = float(size) * 8 / (img.size[0] * img.size[1]*3)
+    with Path(log_path).open("a") as f:
+        f.write( f"  {bpp-total_bpp:.4f}   | "
+                f"  {psnr:.4f}  |"
+               f"  Encoded in {enc_time:.2f}s (model loading: {load_time:.2f}s)\n")
+    recon_img = torch2img(x_recon)
+    recon_img.save(path+"recon/v1_recon"+str(ff)+"_q"+str(quality)+".png")
+        
+    return psnr, bpp, x_recon, enc_time
+
+
+def _decode(inputpath, coder, show, frame, output=None):
+    compressai.set_entropy_coder(coder)
+    dec_start = time.time()
+
+    with Path(inputpath).open("rb") as f:
+        model, metric, quality = parse_header(read_uchars(f, 2))
+        print(f"Model: {model:s}, metric: {metric:s}, quality: {quality:d}")
+
+        for i in range(frame):
+            original_size = read_uints(f, 2)
+            shape = read_uints(f, 2)
+            strings = []
+            n_strings = read_uints(f, 1)[0]
+            for _ in range(n_strings):
+                s = read_bytes(f, read_uints(f, 1)[0])
+                strings.append([s])
+
+            start = time.time()
+            net = models[model](quality=quality, metric=metric, pretrained=True).eval()
+            load_time = time.time() - start
+
+            with torch.no_grad():
+                out = net.decompress(strings, shape)
+
+            x_hat = crop(out["x_hat"], original_size)
+            img = torch2img(x_hat)
+            dec_time = time.time() - dec_start
+            print(f"Decoded in {dec_time:.2f}s (model loading: {load_time:.2f}s)")
+
+            if show:
+                show_image(img)
+            if output is not None:
+                img.save(output+"_frame"+str(i)+".png")
+
+def show_image(img: Image.Image):
+    from matplotlib import pyplot as plt
+
+    fig, ax = plt.subplots()
+    ax.axis("off")
+    ax.title.set_text("Decoded image")
+    ax.imshow(img)
+    fig.tight_layout()
+    plt.show()
+
+
+def encode(argv):
+    parser = argparse.ArgumentParser(description="Encode image to bit-stream")
+    parser.add_argument("image", type=str)
+    parser.add_argument(
+        "--model",
+        choices=models.keys(),
+        default=list(models.keys())[0],
+        help="NN model to use (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-m",
+        "--metric",
+        choices=["mse"],
+        default="mse",
+        help="metric trained against (default: %(default)s",
+    )
+    parser.add_argument(
+        "-q",
+        "--quality",
+        choices=list(range(1, 9)),
+        type=int,
+        default=3,
+        help="Quality setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-c",
+        "--coder",
+        choices=compressai.available_entropy_coders(),
+        default=compressai.available_entropy_coders()[0],
+        help="Entropy coder (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-f",
+        "--frame",
+        type=int,
+        default=100,
+        help="Frame setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-fr",
+        "--framerate",
+        choices=[60,50,24],
+        type=int,
+        default=50,
+        help="Frame rate setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-width",
+        "--width",
+        type=int,
+        default=768,
+        help="width setting (default: %(default))",
+    )
+    parser.add_argument(
+        "-hight",
+        "--hight",
+        type=int,
+        default=768,
+        help="hight setting (default: %(default))",
+    )
+    parser.add_argument("-o", "--output", help="Output path")
+    args = parser.parse_args(argv)
+    path="examples/"+args.image+"/"
+    if not args.output:
+        #args.output = Path(Path(args.image).resolve().name).with_suffix(".bin")
+        args.output = path+args.image+"_"+args.model+"_q"+str(args.quality)+"_v1.bin"
+    log_path=path+args.image+"_"+args.model+"_q"+str(args.quality)+"_v1.txt"
+    
+    header = get_header(args.model, args.metric, args.quality)
+    with Path(args.output).open("wb") as f:
+        write_uchars(f, header)
+        write_uints(f, (args.hight, args.width))
+        
+    with Path(log_path).open("w") as f:
+        f.write(f"model : {args.model}  |  "
+                f"quality : {args.quality}  |  "
+                f"frames : {args.frame}\n")
+        f.write( f"frame  |     bpp     | "
+                f"    psnr     |"
+               f"  Encoded time (model loading)\n"
+               f"  {0:3d}    |  ")
+        
+    total_psnr=0.0
+    total_bpp=0.0
+    total_time=0.0
+    args.image =path + args.image+"_768x768_"+str(args.framerate)+"_8bit_444"
+    img=args.image+"_frame"+str(0)+".png"
+    total_psnr, total_bpp, ref,total_time = _encode(path, img, args.model, args.metric, args.quality, args.coder, True, 0, total_bpp, 0, args.output, log_path)
+    for ff in range(1, args.frame):
+        with Path(log_path).open("a") as f:
+            f.write(f"  {ff:3d}    |  ")
+        img=args.image+"_frame"+str(ff)+".png"
+        
+        psnr, total_bpp, ref,time = _encode(path, img, args.model, args.metric, args.quality, args.coder, False, ref, total_bpp, ff, args.output, log_path)
+        total_psnr+=psnr
+        total_time+=time
+        
+    total_psnr/=args.frame
+    total_bpp/=args.frame
+    
+    with Path(log_path).open("a") as f:
+        f.write( f"\n Total Encoded time: {total_time:.2f}s\n"
+                f"\n Total PSNR: {total_psnr:.6f}\n"
+                f" Total BPP: {total_bpp:.6f}\n")
+    print(total_psnr)
+    print(total_bpp)
+    
+
+def decode(argv):
+    parser = argparse.ArgumentParser(description="Decode bit-stream to imager")
+    parser.add_argument("input", type=str)
+    parser.add_argument(
+        "-c",
+        "--coder",
+        choices=compressai.available_entropy_coders(),
+        default=compressai.available_entropy_coders()[0],
+        help="Entropy coder (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-f",
+        "--frame",
+        choices=list(range(1, 600)),
+        type=int,
+        default=100,
+        help="Frame setting (default: %(default)s)",
+    )
+    parser.add_argument("--show", action="store_true")
+    parser.add_argument("-o", "--output", help="Output path")
+    args = parser.parse_args(argv)
+    
+    args.input="examples/"+args.input+"/"+args.input+"_768x768_"+str(args.frame//2)+"_8bit_444.bin"
+    args.output="examples/recon/"+args.output+"/"+args.output+"_768x768_"+str(50)+"_8bit_444"
+    _decode(args.input, args.coder, args.show, args.frame, args.output)
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("command", choices=["encode", "decode"])
+    args = parser.parse_args(argv)
+    return args
+
+
+def main(argv):
+    args = parse_args(argv[1:2])
+    argv = argv[2:]
+    torch.set_num_threads(1)  # just to be sure
+    if args.command == "encode":
+        encode(argv)
+    elif args.command == "decode":
+        decode(argv)
+
+
+if __name__ == "__main__":
+    main(sys.argv)
--- a/Our Encoder/codec-Copy2.py 0 → 100644
View file @a9105f5
+++ b/Our Encoder/codec-Copy2.py 0 → 100644
View file @a9105f5
+# Copyright 2020 InterDigital Communications, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import struct
+import sys
+import time
+import math
+
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+
+from PIL import Image
+from torchvision.transforms import ToPILImage, ToTensor
+
+import compressai
+
+from compressai.zoo import models
+
+model_ids = {k: i for i, k in enumerate(models.keys())}
+
+metric_ids = {
+    "mse": 0,
+}
+
+
+def inverse_dict(d):
+    # We assume dict values are unique...
+    assert len(d.keys()) == len(set(d.keys()))
+    return {v: k for k, v in d.items()}
+
+
+def filesize(filepath: str) -> int:
+    if not Path(filepath).is_file():
+        raise ValueError(f'Invalid file "{filepath}".')
+    return Path(filepath).stat().st_size
+
+
+def load_image(filepath: str) -> Image.Image:
+    return Image.open(filepath).convert("RGB")
+
+
+def img2torch(img: Image.Image) -> torch.Tensor:
+    return ToTensor()(img).unsqueeze(0)
+
+
+def torch2img(x: torch.Tensor) -> Image.Image:
+    return ToPILImage()(x.clamp_(0, 1).squeeze())
+
+
+def write_uints(fd, values, fmt=">{:d}I"):
+    fd.write(struct.pack(fmt.format(len(values)), *values))
+
+
+def write_uchars(fd, values, fmt=">{:d}B"):
+    fd.write(struct.pack(fmt.format(len(values)), *values))
+
+
+def read_uints(fd, n, fmt=">{:d}I"):
+    sz = struct.calcsize("I")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))
+
+
+def read_uchars(fd, n, fmt=">{:d}B"):
+    sz = struct.calcsize("B")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))
+
+
+def write_bytes(fd, values, fmt=">{:d}s"):
+    if len(values) == 0:
+        return
+    fd.write(struct.pack(fmt.format(len(values)), values))
+
+
+def read_bytes(fd, n, fmt=">{:d}s"):
+    sz = struct.calcsize("s")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))[0]
+
+
+def get_header(model_name, metric, quality):
+    """Format header information:
+    - 1 byte for model id
+    - 4 bits for metric
+    - 4 bits for quality param
+    """
+    metric = metric_ids[metric]
+    code = (metric << 4) | (quality - 1 & 0x0F)
+    return model_ids[model_name], code
+
+
+def parse_header(header):
+    """Read header information from 2 bytes:
+    - 1 byte for model id
+    - 4 bits for metric
+    - 4 bits for quality param
+    """
+    model_id, code = header
+    quality = (code & 0x0F) + 1
+    metric = code >> 4
+    return (
+        inverse_dict(model_ids)[model_id],
+        inverse_dict(metric_ids)[metric],
+        quality,
+    )
+
+
+def pad(x, p=2 ** 6):
+    h, w = x.size(2), x.size(3)
+    H = (h + p - 1) // p * p
+    W = (w + p - 1) // p * p
+    padding_left = (W - w) // 2
+    padding_right = W - w - padding_left
+    padding_top = (H - h) // 2
+    padding_bottom = H - h - padding_top
+    return F.pad(
+        x,
+        (padding_left, padding_right, padding_top, padding_bottom),
+        mode="constant",
+        value=0,
+    )
+
+
+def crop(x, size):
+    H, W = x.size(2), x.size(3)
+    h, w = size
+    padding_left = (W - w) // 2
+    padding_right = W - w - padding_left
+    padding_top = (H - h) // 2
+    padding_bottom = H - h - padding_top
+    return F.pad(
+        x,
+        (-padding_left, -padding_right, -padding_top, -padding_bottom),
+        mode="constant",
+        value=0,
+    )
+
+def compute_psnr(a, b):
+    mse = torch.mean((a - b)**2).item()
+    return -10 * math.log10(mse)
+
+def _encode(path, image, model, metric, quality, coder, i, ref, total_bpp, ff, output, log_path):
+    compressai.set_entropy_coder(coder)
+    enc_start = time.time()
+
+    img = load_image(image)
+    start = time.time()
+    net = models[model](quality=quality, metric=metric, pretrained=True).eval()
+    load_time = time.time() - start
+
+    x = img2torch(img)
+    h, w = x.size(2), x.size(3)
+    p = 64  # maximum 6 strides of 2
+    x = pad(x, p)
+    
+#    header = get_header(model, metric, quality)
+    if i==True:
+        strings = []
+
+        with torch.no_grad():
+            out = net.compress(x)
+        shape = out["shape"]
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape[0], shape[1], len(out["strings"])))
+
+            for s in out["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out = net.decompress(strings, (shape[0], shape[1], len(out["strings"])))
+        x_recon = crop(recon_out["x_hat"], (h, w))
+        
+        psnr=compute_psnr(x, x_recon)
+    else:
+        diff=x-ref
+        #1
+        diff1=torch.clamp(diff, min=-0.5, max=0.5)+0.5
+        
+        #2
+        '''
+        diff1=torch.clamp(diff, min=0.0, max=1.0)
+        diff2=-torch.clamp(diff, min=-1.0, max=0.0)
+        
+        diff1=pad(diff1, p)
+        diff2=pad(diff2, p)
+       '''
+        #1
+        
+        with torch.no_grad():
+            out1 = net.compress(diff1)
+        shape1 = out1["shape"]
+        strings = []
+
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape1[0], shape1[1], len(out1["strings"])))
+
+            for s in out1["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out = net.decompress(strings, (shape1[0], shape1[1], len(out1["strings"])))
+        x_hat1 = crop(recon_out["x_hat"], (h, w))
+        
+        #2
+        '''
+        with torch.no_grad():
+            out1 = net.compress(diff1)
+        shape1 = out1["shape"]
+        strings = []
+
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape1[0], shape1[1], len(out1["strings"])))
+
+            for s in out1["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out = net.decompress(strings, (shape1[0], shape1[1], len(out1["strings"])))
+        x_hat1 = crop(recon_out["x_hat"], (h, w))
+        with torch.no_grad():
+            out = net.compress(diff2)
+        shape = out["shape"]
+        strings = []
+
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape[0], shape[1], len(out["strings"])))
+
+            for s in out["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out = net.decompress(strings, (shape[0], shape[1], len(out["strings"])))
+        x_hat2 = crop(recon_out["x_hat"], (h, w))
+        x_recon=ref+x_hat1-x_hat2
+        '''
+        
+        x_recon=ref+x_hat1-0.5
+        psnr=compute_psnr(x, x_recon)
+        diff_img = torch2img(diff1)
+        diff_img.save(path+"recon/diff"+str(ff)+"_q"+str(quality)+".png")
+        
+    enc_time = time.time() - enc_start
+    size = filesize(output)
+    bpp = float(size) * 8 / (img.size[0] * img.size[1]*3)
+    with Path(log_path).open("a") as f:
+        f.write( f"  {bpp-total_bpp:.4f}   | "
+                f"  {psnr:.4f}  |"
+               f"  Encoded in {enc_time:.2f}s (model loading: {load_time:.2f}s)\n")
+    recon_img = torch2img(x_recon)
+    recon_img.save(path+"recon/recon"+str(ff)+"_q"+str(quality)+".png")
+        
+    return psnr, bpp, x_recon, enc_time
+
+
+def _decode(inputpath, coder, show, frame, output=None):
+    compressai.set_entropy_coder(coder)
+    dec_start = time.time()
+
+    with Path(inputpath).open("rb") as f:
+        model, metric, quality = parse_header(read_uchars(f, 2))
+        print(f"Model: {model:s}, metric: {metric:s}, quality: {quality:d}")
+
+        for i in range(frame):
+            original_size = read_uints(f, 2)
+            shape = read_uints(f, 2)
+            strings = []
+            n_strings = read_uints(f, 1)[0]
+            for _ in range(n_strings):
+                s = read_bytes(f, read_uints(f, 1)[0])
+                strings.append([s])
+
+            start = time.time()
+            net = models[model](quality=quality, metric=metric, pretrained=True).eval()
+            load_time = time.time() - start
+
+            with torch.no_grad():
+                out = net.decompress(strings, shape)
+
+            x_hat = crop(out["x_hat"], original_size)
+            img = torch2img(x_hat)
+            dec_time = time.time() - dec_start
+            print(f"Decoded in {dec_time:.2f}s (model loading: {load_time:.2f}s)")
+
+            if show:
+                show_image(img)
+            if output is not None:
+                img.save(output+"_frame"+str(i)+".png")
+
+def show_image(img: Image.Image):
+    from matplotlib import pyplot as plt
+
+    fig, ax = plt.subplots()
+    ax.axis("off")
+    ax.title.set_text("Decoded image")
+    ax.imshow(img)
+    fig.tight_layout()
+    plt.show()
+
+
+def encode(argv):
+    parser = argparse.ArgumentParser(description="Encode image to bit-stream")
+    parser.add_argument("image", type=str)
+    parser.add_argument(
+        "--model",
+        choices=models.keys(),
+        default=list(models.keys())[0],
+        help="NN model to use (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-m",
+        "--metric",
+        choices=["mse"],
+        default="mse",
+        help="metric trained against (default: %(default)s",
+    )
+    parser.add_argument(
+        "-q",
+        "--quality",
+        choices=list(range(1, 9)),
+        type=int,
+        default=3,
+        help="Quality setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-c",
+        "--coder",
+        choices=compressai.available_entropy_coders(),
+        default=compressai.available_entropy_coders()[0],
+        help="Entropy coder (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-f",
+        "--frame",
+        type=int,
+        default=100,
+        help="Frame setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-fr",
+        "--framerate",
+        choices=[60,50,24],
+        type=int,
+        default=50,
+        help="Frame rate setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-width",
+        "--width",
+        type=int,
+        default=768,
+        help="width setting (default: %(default))",
+    )
+    parser.add_argument(
+        "-height",
+        "--height",
+        type=int,
+        default=768,
+        help="hight setting (default: %(default))",
+    )
+    parser.add_argument("-o", "--output", help="Output path")
+    args = parser.parse_args(argv)
+    path="examples/"+args.image+"/"
+    if not args.output:
+        #args.output = Path(Path(args.image).resolve().name).with_suffix(".bin")
+        args.output = path+args.image+"_"+args.model+"_q"+str(args.quality)+"_v2.bin"
+    log_path=path+args.image+"_"+args.model+"_q"+str(args.quality)+"_v2.txt"
+    
+    header = get_header(args.model, args.metric, args.quality)
+    with Path(args.output).open("wb") as f:
+        write_uchars(f, header)
+        write_uints(f, (args.height, args.width))
+        
+    with Path(log_path).open("w") as f:
+        f.write(f"model : {args.model}  |  "
+                f"quality : {args.quality}  |  "
+                f"frames : {args.frame}\n")
+        f.write( f"frame  |     bpp     | "
+                f"    psnr     |"
+               f"  Encoded time (model loading)\n"
+               f"  {0:3d}    |  ")
+        
+    total_psnr=0.0
+    total_bpp=0.0
+    total_time=0.0
+    args.image =path + args.image+"_768x768_"+str(args.framerate)+"_8bit_444"
+    img=args.image+"_frame"+str(0)+".png"
+    total_psnr, total_bpp, ref, total_time = _encode(path, img, args.model, args.metric, args.quality, args.coder, True, 0, total_bpp, 0, args.output, log_path)
+    for ff in range(1, args.frame):
+        with Path(log_path).open("a") as f:
+            f.write(f"  {ff:3d}    |  ")
+        img=args.image+"_frame"+str(ff)+".png"
+        
+        psnr, total_bpp, ref, time = _encode(path, img, args.model, args.metric, args.quality, args.coder, False, ref, total_bpp, ff, args.output, log_path)
+        total_psnr+=psnr
+        total_time+=time
+        
+    total_psnr/=args.frame
+    total_bpp/=args.frame
+    
+    with Path(log_path).open("a") as f:
+        f.write( f"\n Total Encoded time: {total_time:.2f}s\n"
+                f"\n Total PSNR: {total_psnr:.6f}\n"
+                f" Total BPP: {total_bpp:.6f}\n")
+    print(total_psnr)
+    print(total_bpp)
+    
+
+def decode(argv):
+    parser = argparse.ArgumentParser(description="Decode bit-stream to imager")
+    parser.add_argument("input", type=str)
+    parser.add_argument(
+        "-c",
+        "--coder",
+        choices=compressai.available_entropy_coders(),
+        default=compressai.available_entropy_coders()[0],
+        help="Entropy coder (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-f",
+        "--frame",
+        choices=list(range(1, 600)),
+        type=int,
+        default=100,
+        help="Frame setting (default: %(default)s)",
+    )
+    parser.add_argument("--show", action="store_true")
+    parser.add_argument("-o", "--output", help="Output path")
+    args = parser.parse_args(argv)
+    
+    args.input="examples/"+args.input+"/"+args.input+"_768x768_"+str(args.frame//2)+"_8bit_444.bin"
+    args.output="examples/recon/"+args.output+"/"+args.output+"_768x768_"+str(50)+"_8bit_444"
+    _decode(args.input, args.coder, args.show, args.frame, args.output)
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("command", choices=["encode", "decode"])
+    args = parser.parse_args(argv)
+    return args
+
+
+def main(argv):
+    args = parse_args(argv[1:2])
+    argv = argv[2:]
+    torch.set_num_threads(1)  # just to be sure
+    if args.command == "encode":
+        encode(argv)
+    elif args.command == "decode":
+        decode(argv)
+
+
+if __name__ == "__main__":
+    main(sys.argv)
--- a/Our Encoder/codec-Copy3.py 0 → 100644
View file @a9105f5
+++ b/Our Encoder/codec-Copy3.py 0 → 100644
View file @a9105f5
+# Copyright 2020 InterDigital Communications, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import struct
+import sys
+import time
+import math
+
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+
+from PIL import Image
+from torchvision.transforms import ToPILImage, ToTensor
+
+import compressai
+
+from compressai.zoo import models
+
+model_ids = {k: i for i, k in enumerate(models.keys())}
+
+metric_ids = {
+    "mse": 0,
+}
+
+
+def inverse_dict(d):
+    # We assume dict values are unique...
+    assert len(d.keys()) == len(set(d.keys()))
+    return {v: k for k, v in d.items()}
+
+
+def filesize(filepath: str) -> int:
+    if not Path(filepath).is_file():
+        raise ValueError(f'Invalid file "{filepath}".')
+    return Path(filepath).stat().st_size
+
+
+def load_image(filepath: str) -> Image.Image:
+    return Image.open(filepath).convert("RGB")
+
+
+def img2torch(img: Image.Image) -> torch.Tensor:
+    return ToTensor()(img).unsqueeze(0)
+
+
+def torch2img(x: torch.Tensor) -> Image.Image:
+    return ToPILImage()(x.clamp_(0, 1).squeeze())
+
+
+def write_uints(fd, values, fmt=">{:d}I"):
+    fd.write(struct.pack(fmt.format(len(values)), *values))
+
+
+def write_uchars(fd, values, fmt=">{:d}B"):
+    fd.write(struct.pack(fmt.format(len(values)), *values))
+
+
+def read_uints(fd, n, fmt=">{:d}I"):
+    sz = struct.calcsize("I")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))
+
+
+def read_uchars(fd, n, fmt=">{:d}B"):
+    sz = struct.calcsize("B")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))
+
+
+def write_bytes(fd, values, fmt=">{:d}s"):
+    if len(values) == 0:
+        return
+    fd.write(struct.pack(fmt.format(len(values)), values))
+
+
+def read_bytes(fd, n, fmt=">{:d}s"):
+    sz = struct.calcsize("s")
+    return struct.unpack(fmt.format(n), fd.read(n * sz))[0]
+
+
+def get_header(model_name, metric, quality):
+    """Format header information:
+    - 1 byte for model id
+    - 4 bits for metric
+    - 4 bits for quality param
+    """
+    metric = metric_ids[metric]
+    code = (metric << 4) | (quality - 1 & 0x0F)
+    return model_ids[model_name], code
+
+
+def parse_header(header):
+    """Read header information from 2 bytes:
+    - 1 byte for model id
+    - 4 bits for metric
+    - 4 bits for quality param
+    """
+    model_id, code = header
+    quality = (code & 0x0F) + 1
+    metric = code >> 4
+    return (
+        inverse_dict(model_ids)[model_id],
+        inverse_dict(metric_ids)[metric],
+        quality,
+    )
+
+
+def pad(x, p=2 ** 6):
+    h, w = x.size(2), x.size(3)
+    H = (h + p - 1) // p * p
+    W = (w + p - 1) // p * p
+    padding_left = (W - w) // 2
+    padding_right = W - w - padding_left
+    padding_top = (H - h) // 2
+    padding_bottom = H - h - padding_top
+    return F.pad(
+        x,
+        (padding_left, padding_right, padding_top, padding_bottom),
+        mode="constant",
+        value=0,
+    )
+
+
+def crop(x, size):
+    H, W = x.size(2), x.size(3)
+    h, w = size
+    padding_left = (W - w) // 2
+    padding_right = W - w - padding_left
+    padding_top = (H - h) // 2
+    padding_bottom = H - h - padding_top
+    return F.pad(
+        x,
+        (-padding_left, -padding_right, -padding_top, -padding_bottom),
+        mode="constant",
+        value=0,
+    )
+
+def compute_psnr(a, b):
+    mse = torch.mean((a - b)**2).item()
+    return -10 * math.log10(mse)
+
+def _encode(path, image, model, metric, quality, coder, i, ref, total_bpp, ff, output, log_path):
+    compressai.set_entropy_coder(coder)
+    enc_start = time.time()
+
+    img = load_image(image)
+    start = time.time()
+    net = models[model](quality=quality, metric=metric, pretrained=True).eval()
+    load_time = time.time() - start
+
+    x = img2torch(img)
+    h, w = x.size(2), x.size(3)
+    p = 64  # maximum 6 strides of 2
+    x = pad(x, p)
+    
+#    header = get_header(model, metric, quality)
+    if i==True:
+        strings = []
+
+        with torch.no_grad():
+            out = net.compress(x)
+        shape = out["shape"]
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape[0], shape[1], len(out["strings"])))
+
+            for s in out["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out = net.decompress(strings, (shape[0], shape[1], len(out["strings"])))
+        x_recon = crop(recon_out["x_hat"], (h, w))
+        
+        psnr=compute_psnr(x, x_recon)
+    else:
+        diff=x-ref
+        #2
+        
+        diff1=torch.clamp(diff, min=0.0, max=1.0)
+        diff2=-torch.clamp(diff, min=-1.0, max=0.0)
+        
+        diff1=pad(diff1, p)
+        diff2=pad(diff2, p)
+       
+        #2
+        
+        with torch.no_grad():
+            out1 = net.compress(diff1)
+        shape1 = out1["shape"]
+        strings = []
+
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape1[0], shape1[1], len(out1["strings"])))
+
+            for s in out1["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out1 = net.decompress(strings, (shape1[0], shape1[1], len(out1["strings"])))
+        x_hat1 = crop(recon_out1["x_hat"], (h, w))
+        
+        with torch.no_grad():
+            out = net.compress(diff2)
+        shape = out["shape"]
+        strings = []
+
+        with Path(output).open("ab") as f:
+            # write shape and number of encoded latents
+            write_uints(f, (shape[0], shape[1], len(out["strings"])))
+
+            for s in out["strings"]:
+                write_uints(f, (len(s[0]),))
+                write_bytes(f, s[0])
+                strings.append([s[0]])
+
+        with torch.no_grad():
+            recon_out = net.decompress(strings, (shape[0], shape[1], len(out["strings"])))
+        x_hat2 = crop(recon_out["x_hat"], (h, w))
+        x_recon=ref+x_hat1-x_hat2
+       
+        psnr=compute_psnr(x, x_recon)
+        diff_img = torch2img(diff1)
+        diff_img.save(path+"recon/v3_diff_1_"+str(ff)+"_q"+str(quality)+".png")
+        diff_img = torch2img(diff2)
+        diff_img.save(path+"recon/v3_diff_2_"+str(ff)+"_q"+str(quality)+".png")
+        
+    enc_time = time.time() - enc_start
+    size = filesize(output)
+    bpp = float(size) * 8 / (img.size[0] * img.size[1]*3)
+    with Path(log_path).open("a") as f:
+        f.write( f"  {bpp-total_bpp:.4f}   | "
+                f"  {psnr:.4f}  |"
+               f"  Encoded in {enc_time:.2f}s (model loading: {load_time:.2f}s)\n")
+    recon_img = torch2img(x_recon)
+    recon_img.save(path+"recon/v3_recon"+str(ff)+"_q"+str(quality)+".png")
+        
+    return psnr, bpp, x_recon, enc_time
+
+
+def _decode(inputpath, coder, show, frame, output=None):
+    compressai.set_entropy_coder(coder)
+    dec_start = time.time()
+
+    with Path(inputpath).open("rb") as f:
+        model, metric, quality = parse_header(read_uchars(f, 2))
+        print(f"Model: {model:s}, metric: {metric:s}, quality: {quality:d}")
+
+        for i in range(frame):
+            original_size = read_uints(f, 2)
+            shape = read_uints(f, 2)
+            strings = []
+            n_strings = read_uints(f, 1)[0]
+            for _ in range(n_strings):
+                s = read_bytes(f, read_uints(f, 1)[0])
+                strings.append([s])
+
+            start = time.time()
+            net = models[model](quality=quality, metric=metric, pretrained=True).eval()
+            load_time = time.time() - start
+
+            with torch.no_grad():
+                out = net.decompress(strings, shape)
+
+            x_hat = crop(out["x_hat"], original_size)
+            img = torch2img(x_hat)
+            dec_time = time.time() - dec_start
+            print(f"Decoded in {dec_time:.2f}s (model loading: {load_time:.2f}s)")
+
+            if show:
+                show_image(img)
+            if output is not None:
+                img.save(output+"_frame"+str(i)+".png")
+
+def show_image(img: Image.Image):
+    from matplotlib import pyplot as plt
+
+    fig, ax = plt.subplots()
+    ax.axis("off")
+    ax.title.set_text("Decoded image")
+    ax.imshow(img)
+    fig.tight_layout()
+    plt.show()
+
+
+def encode(argv):
+    parser = argparse.ArgumentParser(description="Encode image to bit-stream")
+    parser.add_argument("image", type=str)
+    parser.add_argument(
+        "--model",
+        choices=models.keys(),
+        default=list(models.keys())[0],
+        help="NN model to use (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-m",
+        "--metric",
+        choices=["mse"],
+        default="mse",
+        help="metric trained against (default: %(default)s",
+    )
+    parser.add_argument(
+        "-q",
+        "--quality",
+        choices=list(range(1, 9)),
+        type=int,
+        default=3,
+        help="Quality setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-c",
+        "--coder",
+        choices=compressai.available_entropy_coders(),
+        default=compressai.available_entropy_coders()[0],
+        help="Entropy coder (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-f",
+        "--frame",
+        type=int,
+        default=100,
+        help="Frame setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-fr",
+        "--framerate",
+        choices=[60,50,24],
+        type=int,
+        default=50,
+        help="Frame rate setting (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-width",
+        "--width",
+        type=int,
+        default=768,
+        help="width setting (default: %(default))",
+    )
+    parser.add_argument(
+        "-hight",
+        "--hight",
+        type=int,
+        default=768,
+        help="hight setting (default: %(default))",
+    )
+    parser.add_argument("-o", "--output", help="Output path")
+    args = parser.parse_args(argv)
+    path="examples/"+args.image+"/"
+    if not args.output:
+        #args.output = Path(Path(args.image).resolve().name).with_suffix(".bin")
+        args.output = path+args.image+"_"+args.model+"_q"+str(args.quality)+"_v3.bin"
+    log_path=path+args.image+"_"+args.model+"_q"+str(args.quality)+"_v3.txt"
+    
+    header = get_header(args.model, args.metric, args.quality)
+    with Path(args.output).open("wb") as f:
+        write_uchars(f, header)
+        write_uints(f, (args.hight, args.width))
+        
+    with Path(log_path).open("w") as f:
+        f.write(f"model : {args.model}  |  "
+                f"quality : {args.quality}  |  "
+                f"frames : {args.frame}\n")
+        f.write( f"frame  |     bpp     | "
+                f"    psnr     |"
+               f"  Encoded time (model loading)\n"
+               f"  {0:3d}    |  ")
+        
+    total_psnr=0.0
+    total_bpp=0.0
+    total_time=0.0
+    args.image =path + args.image+"_768x768_"+str(args.framerate)+"_8bit_444"
+    img=args.image+"_frame"+str(0)+".png"
+    total_psnr, total_bpp, ref, total_time = _encode(path, img, args.model, args.metric, args.quality, args.coder, True, 0, total_bpp, 0, args.output, log_path)
+    for ff in range(1, args.frame):
+        with Path(log_path).open("a") as f:
+            f.write(f"  {ff:3d}    |  ")
+        img=args.image+"_frame"+str(ff)+".png"
+        
+        psnr, total_bpp, ref, time = _encode(path, img, args.model, args.metric, args.quality, args.coder, False, ref, total_bpp, ff, args.output, log_path)
+        total_psnr+=psnr
+        total_time+=time
+        
+    total_psnr/=args.frame
+    total_bpp/=args.frame
+    
+    with Path(log_path).open("a") as f:
+        f.write( f"\n Total Encoded time: {total_time:.2f}s\n"
+                f"\n Total PSNR: {total_psnr:.6f}\n"
+                f" Total BPP: {total_bpp:.6f}\n")
+    print(total_psnr)
+    print(total_bpp)
+    
+
+def decode(argv):
+    parser = argparse.ArgumentParser(description="Decode bit-stream to imager")
+    parser.add_argument("input", type=str)
+    parser.add_argument(
+        "-c",
+        "--coder",
+        choices=compressai.available_entropy_coders(),
+        default=compressai.available_entropy_coders()[0],
+        help="Entropy coder (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-f",
+        "--frame",
+        choices=list(range(1, 600)),
+        type=int,
+        default=100,
+        help="Frame setting (default: %(default)s)",
+    )
+    parser.add_argument("--show", action="store_true")
+    parser.add_argument("-o", "--output", help="Output path")
+    args = parser.parse_args(argv)
+    
+    args.input="examples/"+args.input+"/"+args.input+"_768x768_"+str(args.frame//2)+"_8bit_444_v3.bin"
+    args.output="examples/recon/"+args.output+"/"+args.output+"_768x768_"+str(50)+"_8bit_444"
+    _decode(args.input, args.coder, args.show, args.frame, args.output)
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("command", choices=["encode", "decode"])
+    args = parser.parse_args(argv)
+    return args
+
+
+def main(argv):
+    args = parse_args(argv[1:2])
+    argv = argv[2:]
+    torch.set_num_threads(1)  # just to be sure
+    if args.command == "encode":
+        encode(argv)
+    elif args.command == "decode":
+        decode(argv)
+
+
+if __name__ == "__main__":
+    main(sys.argv)