#!/usr/bin/env python3
# PyTkBrowser: A minimal GUI web browser using only Python's standard library.
# Features:
# - Tkinter GUI (address bar, back/forward/reload, status bar)
# - HTTP/HTTPS GET via urllib
# - Basic HTML-to-text rendering (skips script/style)
# - Clickable links in the rendered page
# - Back/forward history, redirects handling
#
# Limitations:
# - No JavaScript/CSS layout
# - Images, forms, cookies are minimal/not supported
# - Rendering is plain text with numbered links
import tkinter as tk
from tkinter import ttk, messagebox
import urllib.request
import urllib.parse
import urllib.error
from html.parser import HTMLParser
import html
import re
import sys
from collections import deque
USER_AGENT = "PyTkBrowser/0.1 (stdlib-only)"
TIMEOUT = 20
class SimpleRenderer(HTMLParser):
"""Very basic HTML to text renderer with link extraction."""
def __init__(self, base_url=None):
super().__init__()
self.base_url = base_url
self.in_script = False
self.in_style = False
self.text_chunks = []
self.links = [] # list of (label, url)
self.current_link = None
self.list_level = 0
self.title = None
self.in_title = False
def handle_starttag(self, tag, attrs):
tag = tag.lower()
if tag == "script":
self.in_script = True
elif tag == "style":
self.in_style = True
elif tag in ("p", "div", "section", "article"):
self.text_chunks.append("\n")
elif tag in ("br",):
self.text_chunks.append("\n")
elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
self.text_chunks.append("\n")
elif tag in ("ul", "ol"):
self.list_level += 1
elif tag == "li":
self.text_chunks.append(" " * max(0, self.list_level - 1) + "• ")
elif tag == "a":
href = None
for k, v in attrs:
if k.lower() == "href":
href = v
break
if href:
abs_url = urllib.parse.urljoin(self.base_url or "", href)
self.current_link = {"url": abs_url, "text": ""}
elif tag == "title":
self.in_title = True
def handle_endtag(self, tag):
tag = tag.lower()
if tag == "script":
self.in_script = False
elif tag == "style":
self.in_style = False
elif tag in ("p", "div", "section", "article", "h1", "h2", "h3", "h4", "h5", "h6"):
self.text_chunks.append("\n")
elif tag in ("ul", "ol"):
self.list_level = max(0, self.list_level - 1)
elif tag == "a":
if self.current_link:
text = self.current_link["text"].strip() or self.current_link["url"]
self.links.append((text, self.current_link["url"]))
idx = len(self.links)
# append link marker
self.text_chunks.append(f" [{idx}]")
self.current_link = None
elif tag == "title":
self.in_title = False
def handle_data(self, data):
if self.in_script or self.in_style:
return
cleaned = data.replace("\r", " ").replace("\n", " ")
cleaned = re.sub(r"\s+", " ", cleaned)
if cleaned.strip():
if self.in_title:
# accumulate title
if self.title is None:
self.title = cleaned.strip()
else:
self.title += cleaned.strip()
if self.current_link is not None:
self.current_link["text"] += cleaned
self.text_chunks.append(cleaned)
else:
self.text_chunks.append(cleaned)
def handle_entityref(self, name):
self.handle_data(html.unescape(f"&{name};"))
def handle_charref(self, name):
try:
ch = chr(int(name[1:], 16)) if name.startswith("x") else chr(int(name))
except ValueError:
ch = ""
self.handle_data(ch)
def get_text(self):
content = "".join(self.text_chunks)
content = re.sub(r"\n\s*\n\s*\n+", "\n\n", content)
return content.strip()
class BrowserModel:
"""Networking and history management."""
def __init__(self):
self.history_back = deque()
self.history_forward = deque()
self.current_url = None
self.current_text = ""
self.current_links = []
self.title = ""
def normalize_url(self, url: str) -> str:
if not re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", url):
return "http://" + url
return url
def fetch(self, url: str):
url = self.normalize_url(url)
req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
final_url = resp.geturl()
charset = self._get_charset(resp)
data = resp.read()
try:
text = data.decode(charset, errors="replace")
except LookupError:
text = data.decode("utf-8", errors="replace")
return final_url, text
def _get_charset(self, resp) -> str:
ct = resp.headers.get("Content-Type", "")
m = re.search(r"charset=([\w\-]+)", ct, re.IGNORECASE)
return m.group(1) if m else "utf-8"
def render(self, url: str, html_text: str):
renderer = SimpleRenderer(base_url=url)
try:
renderer.feed(html_text)
except Exception:
pass # best-effort
text = renderer.get_text()
self.current_text = text
self.current_links = renderer.links
self.title = renderer.title or url
return text, renderer.links, self.title
def open(self, url: str):
final_url, html_text = self.fetch(url)
text, links, title = self.render(final_url, html_text)
if self.current_url and self.current_url != final_url:
self.history_back.append(self.current_url)
self.history_forward.clear()
self.current_url = final_url
return final_url, text, links, title
def back(self):
if not self.history_back:
return None
self.history_forward.appendleft(self.current_url)
target = self.history_back.pop()
return self.open_direct(target)
def forward(self):
if not self.history_forward:
return None
target = self.history_forward.popleft()
return self.open_direct(target)
def open_direct(self, url: str):
# open without modifying back stack further
final_url, html_text = self.fetch(url)
text, links, title = self.render(final_url, html_text)
self.current_url = final_url
return final_url, text, links, title
class PyTkBrowser(tk.Tk):
def __init__(self):
super().__init__()
self.title("PyTkBrowser")
self.geometry("900x600")
self.model = BrowserModel()
self._build_ui()
def _build_ui(self):
# Top bar
top = ttk.Frame(self)
top.pack(side=tk.TOP, fill=tk.X)
self.btn_back = ttk.Button(top, text="◀ Back", width=8, command=self.on_back)
self.btn_forward = ttk.Button(top, text="Forward ▶", width=10, command=self.on_forward)
self.btn_reload = ttk.Button(top, text="Reload", width=8, command=self.on_reload)
self.addr_var = tk.StringVar()
self.addr_entry = ttk.Entry(top, textvariable=self.addr_var)
self.btn_go = ttk.Button(top, text="Go", width=5, command=self.on_go)
self.btn_back.pack(side=tk.LEFT, padx=4, pady=4)
self.btn_forward.pack(side=tk.LEFT, padx=4, pady=4)
self.btn_reload.pack(side=tk.LEFT, padx=4, pady=4)
self.addr_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=4, pady=4)
self.btn_go.pack(side=tk.LEFT, padx=4, pady=4)
# Content area
content = ttk.Frame(self)
content.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
self.text = tk.Text(content, wrap="word")
self.text.configure(font=("Helvetica", 12))
self.text_scroll = ttk.Scrollbar(content, orient=tk.VERTICAL, command=self.text.yview)
self.text.configure(yscrollcommand=self.text_scroll.set)
self.text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
self.text_scroll.pack(side=tk.LEFT, fill=tk.Y)
# Status bar
status = ttk.Frame(self)
status.pack(side=tk.BOTTOM, fill=tk.X)
self.status_var = tk.StringVar(value="Ready")
self.status_label = ttk.Label(status, textvariable=self.status_var, anchor="w")
self.status_label.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=6, pady=3)
# Text tags for links
self.text.tag_configure("link", foreground="#0645AD", underline=True)
self.text.tag_bind("link", "<Button-1>", self.on_link_click)
self.text.tag_bind("link", "<Enter>", lambda e: self.text.config(cursor="hand2"))
self.text.tag_bind("link", "<Leave>", lambda e: self.text.config(cursor=""))
# Keyboard shortcuts
self.bind("<Return>", lambda e: self.on_go())
self.addr_entry.focus_set()
def set_status(self, msg):
self.status_var.set(msg)
self.update_idletasks()
def on_go(self):
url = self.addr_var.get().strip()
if not url:
return
self.set_status(f"Loading {url} ...")
try:
final_url, text, links, title = self.model.open(url)
self.addr_var.set(final_url)
self.render_page(final_url, text, links, title)
self.set_status(f"Loaded: {final_url}")
except urllib.error.HTTPError as e:
self.render_error(f"HTTP Error {e.code}: {e.reason}")
except urllib.error.URLError as e:
self.render_error(f"URL Error: {e.reason}")
except Exception as e:
self.render_error(f"Error: {e}")
def on_back(self):
self.set_status("Going back...")
result = None
try:
result = self.model.back()
except Exception as e:
self.render_error(f"Error: {e}")
return
if result:
final_url, text, links, title = result
self.addr_var.set(final_url)
self.render_page(final_url, text, links, title)
self.set_status(f"Loaded: {final_url}")
else:
self.set_status("No history.")
def on_forward(self):
self.set_status("Going forward...")
result = None
try:
result = self.model.forward()
except Exception as e:
self.render_error(f"Error: {e}")
return
if result:
final_url, text, links, title = result
self.addr_var.set(final_url)
self.render_page(final_url, text, links, title)
self.set_status(f"Loaded: {final_url}")
else:
self.set_status("No forward history.")
def on_reload(self):
if not self.model.current_url:
self.set_status("No page loaded.")
return
self.set_status("Reloading...")
try:
final_url, text, links, title = self.model.open_direct(self.model.current_url)
self.addr_var.set(final_url)
self.render_page(final_url, text, links, title)
self.set_status(f"Reloaded: {final_url}")
except Exception as e:
self.render_error(f"Error: {e}")
def render_error(self, msg):
self.text.delete("1.0", tk.END)
self.text.insert(tk.END, msg)
self.set_status(msg)
def render_page(self, url, text, links, title):
self.title(f"PyTkBrowser - {title}")
self.text.delete("1.0", tk.END)
# Insert main text
self.text.insert(tk.END, text + "\n\n")
# Insert links section
if links:
self.text.insert(tk.END, "Links:\n")
for i, (label, target) in enumerate(links, start=1):
start_index = self.text.index(tk.END)
line = f" [{i}] {label}\n"
self.text.insert(tk.END, line)
# Tag only the label part as clickable
# Compute tag range within the inserted line
# Start of label: after " [i] "
label_start = f"{float(start_index.split('.')[0])}.{int(start_index.split('.')[1]) + len(f' [{i}] ')}"
label_end = f"{float(label_start.split('.')[0])}.{int(label_start.split('.')[1]) + len(label)}"
# Fallback if indices computation is messy: tag the whole line
try:
self.text.tag_add("link", label_start, label_end)
except Exception:
# Tag the entire line
line_start = start_index
line_end = self.text.index(tk.END)
self.text.tag_add("link", line_start, line_end)
# Store URL in a separate per-line tag
tag_name = f"link_{i}"
self.text.tag_add(tag_name, start_index, self.text.index(tk.END))
# Bind click for this tag to open target
self.text.tag_bind(tag_name, "<Button-1>", lambda e, t=target: self.open_url(t))
else:
self.text.insert(tk.END, "No links found.\n")
def on_link_click(self, event):
# Fallback: not used because we bind per-link tags with URL
pass
def open_url(self, target):
self.addr_var.set(target)
self.on_go()
def main():
app = PyTkBrowser()
# Optionally load a URL from command line
if len(sys.argv) > 1:
app.addr_var.set(sys.argv[1])
app.on_go()
app.mainloop()
if __name__ == "__main__":
main()
No comments:
Post a Comment